Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit f6494c93 authored by Diogo Batista Lima's avatar Diogo Batista Lima
Browse files

Delete reg_network_retrieval.py

parent f9656a4a
from openpyxl import load_workbook
from neo4j.v1 import GraphDatabase
import numpy as np
import pandas as pd
import itertools
def convert_xls_to_pandas_dataframe(xls_file,optional_csv_file=None):
# the input is a xls file and the output is a pandas dataframe
# the second input parameter is optional, converts and generates a csv file in the current directory
wb=load_workbook(xls_file)
ws = wb.active
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = (itertools.islice(r, 1, None) for r in data)
df = pd.DataFrame(data, index=idx, columns=cols)
if optional_csv_file:
df.to_csv(optional_csv_file)
return df
# conversion of xls file to csv to further process it in the appending jupyter notebook
bs_dataframe=convert_xls_to_pandas_dataframe('Bacillus_subtilis_reg_network.xlsx',"Bs_reg.csv")
def convert_csv_to_dict(csv_file,encoding="ISO-8859-1"):
# the input is a csv file and the output is a dictionary with its information
reg=pd.read_csv(csv_file,encoding = encoding) # csv file with regulatory data
reg=reg.drop("Unnamed: 0",axis=1)
dict_sub_keys=list(reg.columns[1:]) # each entity's attributes
dict_reg={}
regl=reg.values.tolist()
for line in regl:
bsu_id=line[0]
genes_explored=dict_reg.keys()
if bsu_id not in genes_explored:
dict_reg[line[0]]={}
for features in range(len(dict_sub_keys)):
attribute=str(line[features+1])
dict_reg[line[0]][dict_sub_keys[features]]=attribute
else:
for features in range(len(dict_sub_keys)):
new_attribute=str(line[features+1])
if new_attribute != dict_reg[line[0]][dict_sub_keys[features]]: # if it's different from the previously added attribute
dict_reg[line[0]][dict_sub_keys[features]]+="|"+ new_attribute # adds to the previously added attribute a vertical bar and the new attribute
return dict_reg
def inject_genes_neo4j(gene_dict):
# the input is a python dictionary generated by "convert_csv_to_dict()"
# makes the connection to neo4j's local host server
# and injects the database with gene info
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "admin"))
session=driver.session()
for gene in list(gene_dict.keys()):
session.run("CREATE (g:Gene {gene_id:{id},gene_name:{name},"
"operon:{op},sigma_factor:{sigma},sigma_factor_number:{sigma_n},"
"regulator_name:{reg_name},regulator_number:{reg_number},regulation_sign:{reg_sign},"
"involved_metabolite:{metab},metabolite_number:{metab_number},metabolite_sign:{metab_sign},"
"regulatory_mecanism:{reg_mec},contitioned_rules:{cond_rules},annotation:{annot}})",id=gene,
name=gene_dict[gene]["% Gene_Name "],op=gene_dict[gene]['% Operon '],
sigma=gene_dict[gene]['Sigma factor'],sigma_n=gene_dict[gene]['Sigma factor number'],
reg_name=gene_dict[gene]['Regulator(s) name'],reg_number=gene_dict[gene]['Regulator number'],
reg_sign=gene_dict[gene]["Regulation sign"],
metab=gene_dict[gene]['Involved Metabolite(s)'],
metab_number=gene_dict[gene]['Metabolite(s) number'],
metab_sign=gene_dict[gene]['Metabolite(s) sign'],
reg_mec=gene_dict[gene]['Regulatory mecanisms'],
cond_rules=gene_dict[gene]['Conditioned rules'],annot=gene_dict[gene]["Annotation"])
session.close()
def inject_regulators_neo4j(regulator_dict):
# the input is a python dictionary generated by "convert_csv_to_dict()"
# makes the connection to neo4j's local host server
# and injects the database with gene info
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "admin"))
session=driver.session()
for regulator in list(regulator_dict.keys()):
session.run("CREATE (r:Regulator {reg_name:{name},reg_id:{id},reg_number:{number},"
"mechanism:{mech},conditioned_rules:{cond},metabolite:{metab},"
"metabolite_number:{metab_number},metabolite_sign:{metab_sign},comment:{com}})",
name=regulator,
id=regulator_dict[regulator]["BSU"],
number=regulator_dict[regulator]['Number'],
mech=regulator_dict[regulator]['Mechanism'],
cond=regulator_dict[regulator]['conditioned_rules'],
metab=regulator_dict[regulator]['metabolite'],
metab_number=regulator_dict[regulator]['metabolite_number'],
metab_sign=regulator_dict[regulator]["metabolite_sign"],
com=regulator_dict[regulator]['Comment'])
session.close()
def match_gene_with_regulator(gene_dict):
# the input is a python dictionary generated by "convert_csv_to_dict()"
# it returns a relationship list containing tuples with a single pair of elements
# This pair is always a gene and its regulator (in this order)
relationship=[]
for gene in gene_dict:
if "|" not in gene_dict[gene]["Regulator number"]:
relationship.append((gene,gene_dict[gene]["Regulator number"]))
else:
regulators=gene_dict[gene]["Regulator number"].split("|")
for reg in regulators:
relationship.append((gene,reg))
return relationship
def connect_gene_regulator_neo4j(relationship):
# the input is the relationship list generated by match_gene_with_regulator()
# makes the connection to neo4j's local host server
# and creates edges connecting genes and their corresponding regulators
# based on the regulation_sign feature, it validates that connection as an
# activation or repression regulation.
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "admin"))
session=driver.session()
for connect_tuple in relationship:
session.run("MATCH (g:Gene),(r:Regulator) WHERE g.gene_id={gene} and r.reg_number={regulator} "
"and g.regulation_sign={repress}"
" CREATE (r)-[re:REPRESSES]->(g)",gene=connect_tuple[0],regulator=connect_tuple[1],repress="-1")
session.run("MATCH (g:Gene),(r:Regulator) WHERE g.gene_id={gene} and r.reg_number={regulator}"
"and g.regulation_sign={activate}"
" CREATE (r)-[act:ACTIVATES]->(g)", gene=connect_tuple[0], regulator=connect_tuple[1],activate="1")
session.close()
if __name__=="__main__":
dict_genes = convert_csv_to_dict("Bs_reg_refined.csv")
inject_genes_neo4j(dict_genes)
regulators_dataframe=convert_xls_to_pandas_dataframe('Regulators.xlsx',"Regulators2.csv")
regulators=convert_csv_to_dict("regulators_processed.csv")
inject_regulators_neo4j(regulators)
relationship=match_gene_with_regulator(dict_genes)
connect_gene_regulator_neo4j(relationship)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment