Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit c82612ac authored by Davide Lagoa's avatar Davide Lagoa
Browse files

ModelSEED relations using file created

parent 030482b8
This diff is collapsed.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Biosynth</groupId>
......@@ -16,7 +17,7 @@
<url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository>
</repositories>
<build>
<plugins>
<!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId>
......@@ -54,6 +55,10 @@
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
......@@ -233,23 +238,20 @@
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.7</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.7</version>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.25</version>
</dependency>
<!-- https://mvnrepository.com/artifact/ch.qos.logback/logback-classic -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.1.7</version>
<version>1.2.3</version>
</dependency>
</dependencies>
</project>
......@@ -17,18 +17,6 @@ public class BuildDatabase {
GraphDatabaseService graphDatabaseService = HelperNeo4jConfigInitializer
.initializeNeo4jDataDatabaseConstraints("C:\\Users\\Davide\\Documents\\BASE DE DADOS BIOSYNTH\\db3_0");
// System.out.println("opening ModelSEED...");
//
// EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t0 = ModelSEED.getMetaboliteTransform();
//
// MetaboliteDao<ModelSeedMetaboliteEntity> dao0 = ModelSEED.getMetaboliteDatabase();
//
// System.out.println("transforming...");
//
// ModelSEED.etl(dao0 , t0, graphDatabaseService);
//
// System.out.println("ModelSEED complete...");
//
// ///////////////////////////////////////////////////////////
// System.out.println();
// System.out.println("requesting kegg compounds...");
......@@ -84,25 +72,34 @@ public class BuildDatabase {
// ///////////////////////////////////////////////////////////
// System.out.println();
System.out.println("opening Metacyc ECOLI...");
EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t4 = Metacyc.getMetacycMetaboliteTransform();
// MetaboliteDao<BiggMetaboliteEntity> daoBigg = getBigg1MetaboliteDatabase();
MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc4 = Metacyc.getMetacycMetaboliteDao("ECOLI");
System.out.println("transforming...");
Metacyc.etl(daoBiocyc4 , t4, graphDatabaseService);
System.out.println("Metacyc ECOLI complete...");
// System.out.println("opening Metacyc ECOLI...");
//
// EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t4 = Metacyc.getMetacycMetaboliteTransform();
// MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc4 = Metacyc.getMetacycMetaboliteDao("ECOLI");
//
// System.out.println("transforming...");
//
// Metacyc.etl(daoBiocyc4 , t4, graphDatabaseService);
//
// System.out.println("Metacyc ECOLI complete...");
//////////////////////////////////////////////
// System.out.println("requesting kegg drugs...");
// System.out.println("opening ModelSEED...");
//
// etl(getKeggDrugMetaboliteDao() , getKeggDrugTransform(), graphDatabaseService);
System.out.println("database load complete...");
// EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t0 = ModelSEED.getMetaboliteTransform();
//
// MetaboliteDao<ModelSeedMetaboliteEntity> dao0 = ModelSEED.getMetaboliteDatabase();
//
// System.out.println("transforming...");
//
// ModelSEED.etl(dao0 , t0, graphDatabaseService);
//
// System.out.println("ModelSEED complete...");
//
ModelSEED.createModelSEEDRelationships(graphDatabaseService);
// System.out.println("database load complete...");
//
graphDatabaseService.shutdown();
System.out.println("shutdown...");
......
package biosynth;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource;
import files.FilesUtils;
import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing;
import pt.uminho.sysbio.biosynth.integration.etl.DefaultMetaboliteEtlExtract;
......@@ -12,36 +27,42 @@ import pt.uminho.sysbio.biosynth.integration.etl.EtlTransform;
import pt.uminho.sysbio.biosynth.integration.etl.HbmNeo4jHybridMetaboliteEtlPipeline;
import pt.uminho.sysbio.biosynth.integration.etl.HeterogenousMetaboliteEtlLoad;
import pt.uminho.sysbio.biosynth.integration.etl.biodb.ModelSeedMetaboliteTransform;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.Neo4jGraphMetaboliteDaoImpl;
import pt.uminho.sysbio.biosynth.integration.neo4j.BiodbMetaboliteNode;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
import pt.uminho.sysbio.biosynthframework.Metabolite;
import pt.uminho.sysbio.biosynthframework.chemanalysis.cdk.CdkWrapper;
import pt.uminho.sysbio.biosynthframework.io.MetaboliteDao;
import utilities.triage_utilities.Utilities;
public class ModelSEED {
// public static void main(String[] args) {
//
// System.out.println("opening...");
//
// EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t = getMetaboliteTransform();
//
// MetaboliteDao<ModelSeedMetaboliteEntity> dao = getMetaboliteDatabase();
//
// System.out.println("transforming...");
//
// etl(dao , t);
//
// System.out.println("finnish...");
//
// }
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
// public static void main(String[] args) {
//
// System.out.println("opening...");
//
// EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t = getMetaboliteTransform();
//
// MetaboliteDao<ModelSeedMetaboliteEntity> dao = getMetaboliteDatabase();
//
// System.out.println("transforming...");
//
// etl(dao , t);
//
// System.out.println("finnish...");
//
// }
public static Json2ModelSeedMetaboliteDaoImpl getMetaboliteDatabase() {
Json2ModelSeedMetaboliteDaoImpl dao = new Json2ModelSeedMetaboliteDaoImpl(new FileSystemResource(new File("C:\\\\Users\\\\Davide\\\\Desktop\\\\compounds.json")));
Json2ModelSeedMetaboliteDaoImpl dao = new Json2ModelSeedMetaboliteDaoImpl(new FileSystemResource(new File("C:\\Users\\Davide\\Desktop\\compounds.json")));
return dao;
}
public static ModelSeedMetaboliteTransform getMetaboliteTransform() {
......@@ -68,7 +89,148 @@ public class ModelSEED {
etlPipeline.setLoadSubsystem(new HeterogenousMetaboliteEtlLoad<GraphMetaboliteEntity>(dst));
etlPipeline.setTransformSubsystem(t);
etlPipeline.etl();
}
public static void createModelSEEDRelationships(GraphDatabaseService graphDatabaseService) {
Map<MetaboliteMajorLabel, Map<String, Set<String>>> data = readSEEDRelationshipsFile();
Transaction dataTx = graphDatabaseService.beginTx();
BiodbGraphDatabaseService service = new BiodbGraphDatabaseService(graphDatabaseService);
for(MetaboliteMajorLabel label : data.keySet()) {
for(String modelSEEDid : data.get(label).keySet()) {
BiodbMetaboliteNode node = service.getMetabolite(modelSEEDid, MetaboliteMajorLabel.ModelSeed);
if(node != null) {
for(String externalID : data.get(label).get(modelSEEDid)) {
if(node != null) {
try {
BiodbMetaboliteNode node2 = service.getMetabolite(externalID, label);
node.createRelationshipTo(node2, RelationshipType.withName("has_crossreference_to"));
String msg = "Relationship created between compound ".concat(externalID).concat(" with source ").concat(label.toString())
.concat(" and ModelSEED compound ").concat(modelSEEDid);
logger.info(msg);
}
catch (Exception e) {
String msg = "Compound ".concat(externalID).concat(" with source ").concat(label.toString())
.concat(" not found to create relationship with ModelSEED compound ").concat(modelSEEDid);
logger.warn(msg);
}
}
else {
String msg = "Compound ".concat(externalID).concat(" with source ").concat(label.toString())
.concat(" not found to create relationship with ModelSEED compound ").concat(modelSEEDid);
logger.warn(msg);
}
}
}
else
logger.warn("Relationship NOT created for ModelSEED compound {}", modelSEEDid);
}
}
dataTx.success();
logger.info("All reationships created!");
dataTx.close();
service.shutdown();
}
public static Map<MetaboliteMajorLabel, Map<String, Set<String>>> readSEEDRelationshipsFile() {
Map<MetaboliteMajorLabel, Map<String, Set<String>>> data = new HashMap<>();
String path = System.getProperty("user.dir").concat("\\src\\main\\resources\\Compounds_Aliases.tsv");
logger.info("Reading external references file at: {}", path);
try {
BufferedReader reader = new BufferedReader(new FileReader(path));
String line;
while ((line = reader.readLine()) != null) {
if(!line.isEmpty()) {
Map<String, Set<String>> submap = new HashMap<>();
String[] text = line.split("\t");
String id = text[0].trim();
String externalID = text[2];
String source = text[3].trim();
if(text.length == 3) {
externalID = text[1].trim();
}
MetaboliteMajorLabel label = null;
if(source.equalsIgnoreCase("BiGG1"))
label = MetaboliteMajorLabel.BiGG2;
else if(source.equalsIgnoreCase("KEGG")) {
label = MetaboliteMajorLabel.LigandCompound;
if(externalID.startsWith("G"))
label = MetaboliteMajorLabel.LigandGlycan;
}
else if(source.equalsIgnoreCase("MetaCyc")) {
label = MetaboliteMajorLabel.MetaCyc;
externalID = "META:".concat(externalID);
}
if(source.equalsIgnoreCase("BiGG"))
label = MetaboliteMajorLabel.BiGG;
if(label != null && !id.isEmpty() && !externalID.isEmpty()) {
if(!data.containsKey(label))
data.put(label, new HashMap<>());
submap = data.get(label);
if(submap.containsKey(id)) {
submap.get(id).add(externalID);
}
else {
Set<String> set = new HashSet<>();
set.add(externalID);
submap.put(id, set);
}
data.put(label, submap);
}
}
}
reader.close();
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
catch (IOException e) {
e.printStackTrace();
}
return data;
}
}
......@@ -27,6 +27,7 @@ import reactions.IdentifyReactionsMetabolites;
import reactions.TransportReactionsBuilder;
import relations.MetabolitesChilds;
import tcdb.capsules.BiosynthMetaboliteProperties;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.TcNumberContainer2;
import triageDatabase.PopulateTriageNeo4jDatabase;
import utilities.FileUtils;
......@@ -65,14 +66,16 @@ public class WriteByMetabolitesID {
BiosynthMetabolites namesAndIDsContainer = getBiosynthDataByName(service, true); //154225 names
// BiosynthMetabolites namesAndIDsContainer = null;
//
// FileUtils.saveMapInFile2("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getMetabolitesIDs5.txt", namesAndIDsContainer.getMetabolitesIDs());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase5.txt", namesAndIDsContainer.getNamesLowerCase());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns5.txt", namesAndIDsContainer.getNamesLowerCaseWithoutSigns());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns5.txt", namesAndIDsContainer.getNamesWithoutSigns());
// FileUtils.saveMapInFile2("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getMetabolitesIDs3.txt", namesAndIDsContainer.getMetabolitesIDs());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase3.txt", namesAndIDsContainer.getNamesLowerCase());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns3.txt", namesAndIDsContainer.getNamesLowerCaseWithoutSigns());
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns3.txt", namesAndIDsContainer.getNamesWithoutSigns());
Map<String, Set<TcNumberContainer2>> reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
Map<String, Set<TcNumberContainer2>> reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile(); //uncomment
// Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
//
// reactionsData2.put("O51235", reactionsData.get("O51235"));
......@@ -86,8 +89,8 @@ public class WriteByMetabolitesID {
int n = 1;
while (n != 99) {
test(namesAndIDsContainer, data, service, null, reactionsData, null);
test(namesAndIDsContainer, data, service, null, reactionsData, null); //uncomment
System.out.println("Enter a random number to repeat (100 to repeat data retrieval) or 99 to finish: ");
......@@ -110,34 +113,59 @@ public class WriteByMetabolitesID {
e.printStackTrace();
}
}
public static void test(BiosynthMetabolites namesAndIDsContainer, Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service, Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs,
Map<String, Set<TcNumberContainer2>> reactionsData2, Map<String, String[]> forChildsSearch) {
Map<String, Set<TcNumberContainer2>> reactionsData, Map<String, String[]> forChildsSearch) {
try {
// Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
//
// reactionsData2.put("Q1D027", reactionsData.get("Q1D027"));
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData2, namesAndIDsContainer, service);
// reactionsData2.put("P0AAG8", reactionsData.get("P0AAG8"));
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData, namesAndIDsContainer, service);
tcdbMetabolitesIDs = metabolitesIdentification.getTcdbMetabolitesIDs();
forChildsSearch = metabolitesIdentification.getforChildsSearch();
forChildsSearch = metabolitesIdentification.getforChildsSearch(); //uncomment
// Map<String, Set<TcNumberContainer2>> reactionsData3 = new HashMap<>();
// reactionsData3.put("O51235", reactionsData.get("O51235"));
/////TRIAGE
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData2, forChildsSearch, service, data).getResults();
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data, namesAndIDsContainer.getMetabolitesIDs()).getResults(); //uncomment
// JSONFilesUtils.writeJSONTriageReactions(newData);
// Map<String, Set<TcNumberContainer2>> newData2 = new Has
new PopulateTriageNeo4jDatabase(newData);
///////////////////////////
Set<TcNumberContainer2> containers = newData.get("P0AAG8");
for(TcNumberContainer2 container : containers) {
if(container.getTcNumber().equalsIgnoreCase("3.A.1.2.3")) {
Set<Integer> ids = container.getAllReactionsIds();
for(Integer id : ids) {
ReactionContainer rcont = container.getReactionContainer(id);
System.out.println(rcont.getReaction());
System.out.println(rcont.getReactionMetaCyc());
System.out.println(rcont.getReactionModelSEED());
System.out.println(rcont.getReactionKEGG());
}
}
}
////////////////////////////
// new PopulateTriageNeo4jDatabase(newData); //uncomment
////COUNTS
......@@ -592,11 +620,11 @@ public class WriteByMetabolitesID {
if(useCache) {
namesLowerCaseWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns3.txt");
namesWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns3.txt");
namesLowerCase = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase3.txt");
namesLowerCaseWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns5.txt");
namesWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns5.txt");
namesLowerCase = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase5.txt");
compounds = FileUtils.readMapFromFile2("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getMetabolitesIDs3.txt");
compounds = FileUtils.readMapFromFile2("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getMetabolitesIDs5.txt");
return new BiosynthMetabolites(compounds, namesLowerCaseWithoutSigns, namesWithoutSigns, namesLowerCase);
}
......@@ -608,11 +636,13 @@ public class WriteByMetabolitesID {
Set<BiodbMetaboliteNode> allMetabolites = service.listMetabolites();
// Set<BiodbMetaboliteNode> allMetabolites = new HashSet<>();
// allMetabolites.add(service.getMetabolite("C05359", MetaboliteMajorLabel.LigandCompound));
// allMetabolites.add(service.getMetabolite("glu-L", MetaboliteMajorLabel.BiGG));
// allMetabolites.add(service.getMetabolite("META:CU+2", MetaboliteMajorLabel.MetaCyc));
// allMetabolites.add(service.getMetabolite("cpd12713", MetaboliteMajorLabel.ModelSeed));
// allMetabolites.add(service.getMetabolite("glu__L", MetaboliteMajorLabel.BiGGMetabolite));
// allMetabolites.add(service.getMetabolite("cpd00009", MetaboliteMajorLabel.ModelSeed));
// allMetabolites.add(service.getMetabolite("C00009", MetaboliteMajorLabel.LigandCompound));
// allMetabolites.add(service.getMetabolite("pi", MetaboliteMajorLabel.BiGG));
// allMetabolites.add(service.getMetabolite("ECOLI:Phosphates", MetaboliteMajorLabel.MetaCyc));
// allMetabolites.add(service.getMetabolite("META:CPD-20124", MetaboliteMajorLabel.MetaCyc));
// allMetabolites.add(service.getMetabolite("cpd11715", MetaboliteMajorLabel.ModelSeed));
// allMetabolites.add(service.getMetabolite("pi", MetaboliteMajorLabel.BiGGMetabolite));
// Set<String> allDatabases = new HashSet<>();
......@@ -649,9 +679,21 @@ public class WriteByMetabolitesID {
// System.out.println(formula + "\t" + names);
names.remove("co2"); // to avoid errors with cobalt ions
names.remove("h");
names.remove("H");
if(names.contains("glucose") || names.contains("Glucose") || names.contains("d-glucose") || names.contains("D-glucose")) {
names.remove("sugar");
names.remove("D-Glucopyranose");
names.remove("d-glucopyranose");
}
for(String name : names) {
// if(name.equals("glucose"))
// System.out.println("found");
// System.out.println();
//
// System.out.println(name + " >>>>> " + formula);
......@@ -691,10 +733,6 @@ public class WriteByMetabolitesID {
for(String name : names) {
// if(name.equals("Cu2+"))
// System.out.println("coiso");
// System.out.println("name>>" + name);
if(!found) {
......@@ -712,6 +750,7 @@ public class WriteByMetabolitesID {
// System.out.println("alias>>> " + alias.get(name));
for(String name2 : alias.get(name)) {
if(compounds.containsKey(name2))
ids.putAll(compounds.get(name2));
}
......
package kbase;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import enumerators.TypeOfTransporter;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.TcNumberContainer;
public class ModelSEEDCompoundsFileReader {
/**
* Method to read JSON exceptions file.
*
* @return
*/
public static Map<String, Map<String, String>> readFile() {
JSONParser parser = new JSONParser();
Map<String, Map<String, String>> data = new HashMap<>();
try {
Object obj = parser.parse(new FileReader("C:\\Users\\Davide\\Desktop\\Compounds.json"));
JSONObject allObjects = (JSONObject) obj;
@SuppressWarnings("unchecked")
Set<String> keys = allObjects.keySet();
for(String met : keys) {
Map<String, String> metProperties = new HashMap<>();
JSONObject properties = (JSONObject) allObjects.get(met);
if(properties.get("abbreviation") != null)
metProperties.put("abbreviation", properties.get("abbreviation").toString());
if(properties.get("abstract_compound") != null)
metProperties.put("abstract_compound"