Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit eff6afef authored by jorgemlferreira's avatar jorgemlferreira
Browse files

[ADD] Function for molecular features

[ADD] .CSV with protein features for the domains
parent 32cb77d0
This diff is collapsed.
......@@ -46,16 +46,20 @@ def proteins(df_with_chembl_and_canonical_smiles, df=None):
'Moran': __get_descriptor(protein_object.GetMoranAuto),
'APAAC': __get_descriptor(protein_object.GetAPAAC),
'QSO': __get_descriptor(protein_object.GetQSO),
'Triad': __get_descriptor(protein_object.GetTriad),
'SOCN': __get_descriptor(protein_object.GetSOCN),
'Tripeptide': __get_descriptor(protein_object.GetTPComp)}
# 'Triad': __get_descriptor(protein_object.GetTriad),
# 'SOCN': __get_descriptor(protein_object.GetSOCN),
'Tripeptide': __get_descriptor(protein_object.GetTPComp),
'Pseudo AA': __get_descriptor(protein_object.GetPAAC),
'Moreau': __get_descriptor(protein_object.GetMoreauBrotoAuto),
'Geary': __get_descriptor(protein_object.GetGearyAuto)}
return pd.Series(descriptors_functions)
'''End of auxiliary functions'''
descriptors = pd.Series(['AAComp', 'CTD', 'DPComp', 'APAAC', 'Moran', 'QSO', 'Triad', 'SOCN', 'Tripeptide'])
descriptors = pd.Series(['AAComp', 'CTD', 'DPComp', 'APAAC', 'Moran', 'QSO', 'Triad', 'SOCN', 'Tripeptide', 'Pseudo AA', 'Moreau',
'Geary'])
if df is None:
df = pd.DataFrame(data=None, columns=pd.Series(df_with_chembl_and_canonical_smiles['accession'].unique()),
df = pd.DataFrame(data=None, columns=pd.Series(df_with_chembl_and_canonical_smiles['domain_sequence']),
index=descriptors)
else:
df = df
......@@ -66,8 +70,9 @@ def proteins(df_with_chembl_and_canonical_smiles, df=None):
if df.loc['AAComp'][protein] is np.nan:
print('Redoing the analysis')
try:
protein_object = po.GetProteinSequenceFromID(protein)
po.ReadProteinSequence(protein_object)
# protein_object = po.protein.ReadProteinSequence(protein)
# protein_object = po.GetProteinSequenceFromID(protein)
po.ReadProteinSequence(protein)
df[protein] = pd.Series(_protein_descriptors(po))
except Exception as e:
print(e)
......@@ -77,19 +82,16 @@ def proteins(df_with_chembl_and_canonical_smiles, df=None):
if __name__ == '__main__':
chembl = pd.read_csv('chembl23_selected.csv')
df = pd.read_csv('protein_descriptors.csv',index_col=1,)
df.index = df['Unnamed: 0.1']
df_final = df.iloc[:,2:].T
x,v = proteins(chembl, df_final)
#todo
asd = _create_matrix_with_data(df_final,'Tripeptide')
#todo
x.to_csv('protein_descriptors.csv')
pd.Series(v).to_csv('missing_proteins.txt')
# dfxx = pd.DataFrame(np.concatenate(
# [x.reshape(1, -1) if isinstance(x, np.ndarray) else np.array([np.nan] * 167).reshape(1,-1) for x in
# (qwer.MACCS).tolist()], axis=0)).set_index(qwer.iloc[:,0])
\ No newline at end of file
print('not needed so far')
# chembl = pd.read_csv('chembl23_selected.csv')
#
# df = pd.read_csv('protein_descriptors.csv',index_col=1,)
# df.index = df['Unnamed: 0.1']
# df_final = df.iloc[:,2:].T
# x,v = proteins(chembl, df_final)
# #todo
# asd = _create_matrix_with_data(df_final,'Tripeptide')
# #todo
# x.to_csv('protein_descriptors.csv')
# pd.Series(v).to_csv('missing_proteins.txt')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment