Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit de067005 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

TC family identifier | search by taxonomy fixed

parent 990e6ca8
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -5,7 +5,10 @@ import org.neo4j.graphdb.GraphDatabaseService;
import edu.uminho.biosynth.core.data.integration.neo4j.HelperNeo4jConfigInitializer;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.EtlTransform;
import pt.uminho.sysbio.biosynthframework.biodb.bigg.Bigg2MetaboliteEntity;
import pt.uminho.sysbio.biosynthframework.biodb.bigg.BiggMetaboliteEntity;
import pt.uminho.sysbio.biosynthframework.biodb.biocyc.BioCycMetaboliteEntity;
import pt.uminho.sysbio.biosynthframework.biodb.seed.ModelSeedMetaboliteEntity;
import pt.uminho.sysbio.biosynthframework.io.MetaboliteDao;
public class BuildDatabase {
......@@ -15,91 +18,91 @@ public class BuildDatabase {
System.out.println("setting GraphDatabaseService...");
GraphDatabaseService graphDatabaseService = HelperNeo4jConfigInitializer
.initializeNeo4jDataDatabaseConstraints("C:\\Users\\Davide\\Documents\\BASE DE DADOS BIOSYNTH\\db3_0");
// ///////////////////////////////////////////////////////////
// System.out.println();
// System.out.println("requesting kegg compounds...");
//
// KEGG.etl(KEGG.getKeggCompoundMetaboliteDao() , KEGG.getKeggCompoundTransform(), graphDatabaseService);
//
// System.out.println("requesting kegg glycans...");
//
// KEGG.etl(KEGG.getKeggGlycanMetaboliteDao() , KEGG.getKeggGlycanTransform(), graphDatabaseService);
//
// ///////////////////////////////////////////////////////////
//
// System.out.println();
// System.out.println("opening BIGG1...");
//
// EtlTransform<BiggMetaboliteEntity, GraphMetaboliteEntity> t1 = BIGG1.getBiggMetaboliteTransform();
//
// MetaboliteDao<BiggMetaboliteEntity> daoBigg1 = BIGG1.getBigg1MetaboliteDatabase();
//
// System.out.println("transforming...");
//
// BIGG1.etl(daoBigg1, t1, graphDatabaseService);
//
// System.out.println("BIGG1 complete...");
//
// ///////////////////////////////////////////////////////////
// System.out.println();
// System.out.println("opening BIGG2...");
//
// EtlTransform<Bigg2MetaboliteEntity, GraphMetaboliteEntity> t2 = BIGG2.getBIGG2MetaboliteTransform();
// MetaboliteDao<Bigg2MetaboliteEntity> daoBigg2 = BIGG2.getBigg2MetaboliteDao();
//
// System.out.println("transforming...");
//
// BIGG2.etl(daoBigg2 , t2, graphDatabaseService);
//
// System.out.println("BIGG2 complete...");
//
// ///////////////////////////////////////////////////////////
// System.out.println();
// System.out.println("opening Metacyc META...");
//
// EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t3 = Metacyc.getMetacycMetaboliteTransform();
//// MetaboliteDao<BiggMetaboliteEntity> daoBigg = getBigg1MetaboliteDatabase();
// MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc3 = Metacyc.getMetacycMetaboliteDao("META");
//
// System.out.println("transforming...");
//
// Metacyc.etl(daoBiocyc3 , t3, graphDatabaseService);
//
// System.out.println("Metacyc META complete...");
//
// ///////////////////////////////////////////////////////////
// System.out.println();
// System.out.println("opening Metacyc ECOLI...");
//
// EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t4 = Metacyc.getMetacycMetaboliteTransform();
// MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc4 = Metacyc.getMetacycMetaboliteDao("ECOLI");
//
// System.out.println("transforming...");
//
// Metacyc.etl(daoBiocyc4 , t4, graphDatabaseService);
//
// System.out.println("Metacyc ECOLI complete...");
.initializeNeo4jDataDatabaseConstraints("C:\\Users\\Davide\\Documents\\BASE DE DADOS BIOSYNTH\\db5_0");
///////////////////////////////////////////////////////////
System.out.println();
System.out.println("requesting kegg compounds...");
KEGG.etl(KEGG.getKeggCompoundMetaboliteDao() , KEGG.getKeggCompoundTransform(), graphDatabaseService);
System.out.println("requesting kegg glycans...");
KEGG.etl(KEGG.getKeggGlycanMetaboliteDao() , KEGG.getKeggGlycanTransform(), graphDatabaseService);
///////////////////////////////////////////////////////////
System.out.println();
System.out.println("opening BIGG1...");
EtlTransform<BiggMetaboliteEntity, GraphMetaboliteEntity> t1 = BIGG1.getBiggMetaboliteTransform();
MetaboliteDao<BiggMetaboliteEntity> daoBigg1 = BIGG1.getBigg1MetaboliteDatabase();
System.out.println("transforming...");
BIGG1.etl(daoBigg1, t1, graphDatabaseService);
System.out.println("BIGG1 complete...");
///////////////////////////////////////////////////////////
System.out.println();
System.out.println("opening BIGG2...");
EtlTransform<Bigg2MetaboliteEntity, GraphMetaboliteEntity> t2 = BIGG2.getBIGG2MetaboliteTransform();
MetaboliteDao<Bigg2MetaboliteEntity> daoBigg2 = BIGG2.getBigg2MetaboliteDao();
System.out.println("transforming...");
BIGG2.etl(daoBigg2 , t2, graphDatabaseService);
System.out.println("BIGG2 complete...");
///////////////////////////////////////////////////////////
System.out.println();
System.out.println("opening Metacyc META...");
EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t3 = Metacyc.getMetacycMetaboliteTransform();
// MetaboliteDao<BiggMetaboliteEntity> daoBigg = getBigg1MetaboliteDatabase();
MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc3 = Metacyc.getMetacycMetaboliteDao("META");
System.out.println("transforming...");
Metacyc.etl(daoBiocyc3 , t3, graphDatabaseService);
System.out.println("Metacyc META complete...");
///////////////////////////////////////////////////////////
System.out.println();
System.out.println("opening Metacyc ECOLI...");
EtlTransform<BioCycMetaboliteEntity, GraphMetaboliteEntity> t4 = Metacyc.getMetacycMetaboliteTransform();
MetaboliteDao<BioCycMetaboliteEntity> daoBiocyc4 = Metacyc.getMetacycMetaboliteDao("ECOLI");
System.out.println("transforming...");
Metacyc.etl(daoBiocyc4 , t4, graphDatabaseService);
System.out.println("Metacyc ECOLI complete...");
//////////////////////////////////////////////
// System.out.println("opening ModelSEED...");
//
// EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t0 = ModelSEED.getMetaboliteTransform();
//
// MetaboliteDao<ModelSeedMetaboliteEntity> dao0 = ModelSEED.getMetaboliteDatabase();
//
// System.out.println("transforming...");
//
// ModelSEED.etl(dao0 , t0, graphDatabaseService);
//
// System.out.println("ModelSEED complete...");
//
System.out.println("opening ModelSEED...");
EtlTransform<ModelSeedMetaboliteEntity, GraphMetaboliteEntity> t0 = ModelSEED.getMetaboliteTransform();
MetaboliteDao<ModelSeedMetaboliteEntity> dao0 = ModelSEED.getMetaboliteDatabase();
System.out.println("transforming...");
ModelSEED.etl(dao0 , t0, graphDatabaseService);
System.out.println("ModelSEED complete...");
ModelSEED.createModelSEEDRelationships(graphDatabaseService);
// System.out.println("database load complete...");
//
System.out.println("database load complete...");
graphDatabaseService.shutdown();
System.out.println("shutdown...");
......
......@@ -84,7 +84,7 @@ public class KEGG {
HbmNeo4jHybridMetaboliteEtlPipeline<M, GraphMetaboliteEntity> etlPipeline =
new HbmNeo4jHybridMetaboliteEtlPipeline<>();
// etlPipeline.exclude.add("D05511");
etlPipeline.exclude.add("D05511");
etlPipeline.setSkipLoad(false);
etlPipeline.setGraphDatabaseService(graphDatabaseService);
etlPipeline.setSessionFactory(null);
......
......@@ -193,7 +193,7 @@ public class ModelSEED {
MetaboliteMajorLabel label = null;
if(source.equalsIgnoreCase("BiGG1"))
label = MetaboliteMajorLabel.BiGG2;
label = MetaboliteMajorLabel.BiGG;
else if(source.equalsIgnoreCase("KEGG")) {
label = MetaboliteMajorLabel.LigandCompound;
......@@ -208,7 +208,7 @@ public class ModelSEED {
}
if(source.equalsIgnoreCase("BiGG"))
label = MetaboliteMajorLabel.BiGG;
label = MetaboliteMajorLabel.BiGGMetabolite;
if(label != null && !id.isEmpty() && !externalID.isEmpty()) {
......
......@@ -36,6 +36,7 @@ import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
import utilities.triage_utilities.Properties;
/**
* @author Davide
......@@ -48,6 +49,8 @@ public class WriteByMetabolitesID {
public static void main(String[] args) {
try {
Properties properties = new Properties();
GraphDatabaseService graphDatabaseService = initializeNeo4jdb.getDataDatabase(null);
Transaction dataTx = graphDatabaseService.beginTx();
......@@ -93,7 +96,7 @@ public class WriteByMetabolitesID {
// new FetchCompoundsByName(service, false).getResults();
test(namesAndIDsContainer, data, service, null, reactionsData, null); //uncomment
test(namesAndIDsContainer, data, service, null, reactionsData, null, properties); //uncomment
// test2(service);
......@@ -169,19 +172,19 @@ public class WriteByMetabolitesID {
public static void test(BiosynthMetabolites namesAndIDsContainer, Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service, Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs,
Map<String, Set<TcNumberContainer2>> reactionsData, Map<String, String[]> forChildsSearch) {
Map<String, Set<TcNumberContainer2>> reactionsData, Map<String, String[]> forChildsSearch, Properties properties) {
// new FetchCompoundsByName(service, false).getResults();
try {
Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
// Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
reactionsData2.put("P02916", reactionsData.get("P02916"));
// reactionsData2.put("P0A927", reactionsData.get("P0A927"));
// reactionsData2.put("P0AEE5", reactionsData.get("P0AEE5"));
// reactionsData2.put("P23200", reactionsData.get("P23200"));
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData2, namesAndIDsContainer, service);
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData, namesAndIDsContainer, service);
tcdbMetabolitesIDs = metabolitesIdentification.getTcdbMetabolitesIDs();
......@@ -191,27 +194,32 @@ public class WriteByMetabolitesID {
// reactionsData3.put("O51235", reactionsData.get("O51235"));
System.out.println(tcdbMetabolitesIDs);
// System.out.println(tcdbMetabolitesIDs);
/////TRIAGE
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData2, forChildsSearch, service, data, namesAndIDsContainer.getMetabolitesIDs()).getResults(); //uncomment
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data, namesAndIDsContainer.getMetabolitesIDs(), properties).getResults(); //uncomment
// System.out.println(service.getNodeById(Long.valueOf(65729)).getAllProperties());
for(TcNumberContainer2 container : newData.get("P02916")) {
System.out.println(container.getTcNumber());
for( Integer id : container.getAllReactionsIds()) {
System.out.println();
System.out.println(container.getReactionContainer(id).getReaction());
System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
System.out.println(container.getReactionContainer(id).getReactionModelSEED());
}
System.out.println();
}
// for(TcNumberContainer2 container : newData.get("P0A927")) {
//
// System.out.println(container.getTcNumber());
//
// for( Integer id : container.getAllReactionsIds()) {
// System.out.println();
//
// System.out.println(container.getReactionContainer(id).getReactionID());
// System.out.println(container.getReactionContainer(id).getReaction());
// System.out.println(container.getReactionContainer(id).getReactionBase());
// System.out.println(container.getReactionContainer(id).getReactionKEGG());
// System.out.println(container.getReactionContainer(id).getReactionBiGG());
// System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
// System.out.println(container.getReactionContainer(id).getReactionModelSEED());
// }
//
//
// System.out.println();
// }
......
......@@ -110,7 +110,7 @@ public class Reports {
JSONObject obj2 = new JSONObject();
String tcFamily = getFamilyForAnnotation(container);
String tcFamily = "";
if(!tcFamily.equals("none"))
totalGenesAnnotated++;
......@@ -211,7 +211,7 @@ public class Reports {
* @param container
* @return
*/
private static String getFamilyForAnnotation(Map<String, Set<String>> container) {
public static String getFamilyForAnnotation(Map<String, Set<String>> container) {
Map<String, Integer> counts = new HashMap<>();
int maxCount = 0;
......@@ -221,7 +221,12 @@ public class Reports {
String[] split = tc.split("\\.");
tc = split[0].concat(".").concat(split[1]).concat(".").concat(split[2]).concat(".");
try {
tc = split[0].concat(".").concat(split[1]).concat(".").concat(split[2]).concat(".");
}
catch (Exception e) {
System.out.println("ERRO FAMILIA " + tc);
}
if(counts.containsKey(tc)) {
......@@ -241,7 +246,7 @@ public class Reports {
}
}
return tcFamily.replaceAll("\\.$", "");
return tcFamily;
}
}
......@@ -326,6 +326,9 @@ public class IdentifyReactionsMetabolites {
for(String metabolite : new HashSet<>(metabolites)) {
if(metabolite.equalsIgnoreCase("galactonate"))
metabolite = "l-galactonate";
// System.out.println(metabolite);
//
// System.out.println("map " + metabolite + "\t" + allMetabolitesByName.get(metabolite));
......@@ -593,15 +596,15 @@ public class IdentifyReactionsMetabolites {
forChildsSearch.put(metabolite, entry);
}
if(ids.containsKey(MetaboliteMajorLabel.MetaCyc))
if(ids.containsKey(MetaboliteMajorLabel.ModelSeed))
return MetaboliteMajorLabel.ModelSeed;
else if(ids.containsKey(MetaboliteMajorLabel.MetaCyc))
return MetaboliteMajorLabel.MetaCyc;
else if(ids.containsKey(MetaboliteMajorLabel.EcoCyc))
return MetaboliteMajorLabel.EcoCyc;
if(ids.containsKey(MetaboliteMajorLabel.ModelSeed))
return MetaboliteMajorLabel.ModelSeed;
else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound))
return MetaboliteMajorLabel.LigandCompound;
......
......@@ -7,6 +7,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
......@@ -49,6 +50,7 @@ import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLRead
import pt.uminho.ceb.biosystems.mew.biocomponents.validation.io.JSBMLValidationException;
import tcdb.capsules.GeneContainer;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.Subunits;
import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer;
......@@ -87,6 +89,7 @@ public class ProvideTransportReactionsToGenes {
private Map<String, String> modelMetabolites;
private TriageGeneralProperties defaultLabel;
private Subunits subunits;
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
......@@ -106,12 +109,13 @@ public class ProvideTransportReactionsToGenes {
this.taxID = taxID;
this.jobIdentification = new File(queryPath).getName().replace(".faa", "");
subunits = new Subunits();
defaultLabel = getDefaultLabel();
findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(true, queryPath, properties);
Blast blast = new Blast(false, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize();
......@@ -133,7 +137,7 @@ public class ProvideTransportReactionsToGenes {
while (n != 99) {
// tests();
// tests();
start(); //uncomment
......@@ -163,35 +167,70 @@ public class ProvideTransportReactionsToGenes {
}
private void tests() {
String tc = "3.D.1.1.1";
Node tcNumberNode = service.findTcNumberNode(tc);
System.out.println(tc);
System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
if(tcNumberNode != null) {
System.out.println(tcNumberNode.getAllProperties());
Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
for(Relationship rel : relations) {
System.out.println(rel.getEndNode().getAllProperties());
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty("Reaction"));
if(rel.getEndNode().hasProperty(TriageGeneralProperties.ReactionModelSEED.toString()))
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty(TriageGeneralProperties.ReactionModelSEED.toString()));
try {
Set<Node> list = service.getAllNodesByLabel(TriageNodeLabel.Reaction);
Iterator<Node> iterator = list.iterator();
Set<String> metabolites = new HashSet<>();
while(iterator.hasNext()) {
Node node = iterator.next();
String reaction = node.getProperty(TriageGeneralProperties.ReactionBase.toString()).toString();
boolean reversible = Boolean.valueOf(node.getProperty(TriageGeneralProperties.Reversible.toString()).toString());
String regex = ReactionContainer.IRREV_TOKEN;
if(reversible)
regex = ReactionContainer.REV_TOKEN;
String[] compounds = reaction.replaceAll(regex, " + ").split("\\s+\\+\\s+");
for(String c : compounds)
metabolites.add(c.replaceAll("=ModelSeed", "").replaceAll("\\(in\\)", "").replaceAll("\\(out\\)", "").trim());
}
System.out.println(metabolites.size());
}
catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
else
System.out.println("nulo");
// String tc = "3.D.1.1.1";
//
// Node tcNumberNode = service.findTcNumberNode(tc);
//
// System.out.println(tc);
//
// System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
//
// if(tcNumberNode != null) {
//
// System.out.println(tcNumberNode.getAllProperties());
//
// Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
//
// for(Relationship rel : relations) {
//
// System.out.println(rel.getEndNode().getAllProperties());
// System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty("Reaction"));
//
// if(rel.getEndNode().hasProperty(TriageGeneralProperties.ReactionModelSEED.toString()))
//
// System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty(TriageGeneralProperties.ReactionModelSEED.toString()));
//
// }
//
// }
// else
// System.out.println("nulo");
}
private void findTaxonomyByTaxonomyID(Integer taxID) {
......@@ -244,40 +283,52 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Searching reactions...");
resultsByEvalue = ReactionsPredictor.getReactionsForGenesByEvalue(blastResults, properties.geteValueThreshold());
resultsByEvalue = getReactionsForGenesByEvalue();
Set<String> tcNumbers = identifyTcNumbersForSearch();
reactionsByTcNumber = getReactionsByTcNumber(identifyTcNumbersForSearch());
this.subunits.setSubunitsDatabase(findSubunitsInDatabase(tcNumbers));
reactionsByTcNumber = getReactionsByTcNumber(tcNumbers);
// System.out.println(reactionsByTcNumber);
if(reactionContainersByID.keySet().size() == 0)
logger.warn("No metabolites present in the model are available in the selected reactions!");
Map<String, GeneContainer> data = buildGenesContainers();
Map<String, GeneContainer> genesContainers = buildGenesContainers();
Map<String, Map<String, Set<String>>> similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(data, reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore);
Map<String, Set<String>> similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(genesContainers, reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore);
String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
// generateFinalResults(similaritiesResults);
generateFinalResultsAuxValidation(similaritiesResults, path);
generateFinalResultsAuxValidation(similaritiesResults, genesContainers, path);
// System.out.println(finalResults);
logger.debug("Reactions search complete!");
// Map<String, String> reactionsIDS = null;
Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
System.out.println("1");
OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
System.out.println("2");
Container container = new Container(output);
container.verifyDepBetweenClass();
String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
System.out.println("3");
// String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
// TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
sbml.writeToFile();