Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 990e6ca8 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refatoring names retriever from biosynth database + validation

parent c82612ac
...@@ -16,8 +16,16 @@ ...@@ -16,8 +16,16 @@
<!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> --> <!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> -->
<url>http://193.137.11.210/nexus/content/groups/public/</url> <url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository> </repository>
<repository>
<id>ebi-repo</id>
<name>ebi-repo</name>
<url>http://www.ebi.ac.uk/intact/maven/nexus/content/repositories/ebi-repo/</url>
</repository>
</repositories> </repositories>
<build> <build>
<plugins> <plugins>
<!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId>
...@@ -59,6 +67,16 @@ ...@@ -59,6 +67,16 @@
<groupId>org.apache.logging.log4j</groupId> <groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId> <artifactId>log4j-slf4j-impl</artifactId>
</exclusion> </exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
...@@ -253,5 +271,12 @@ ...@@ -253,5 +271,12 @@
<version>1.2.3</version> <version>1.2.3</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.ebi.chebi.webapps.chebiWS.client/chebiWS-client -->
<dependency>
<groupId>uk.ac.ebi.chebi.webapps.chebiWS.client</groupId>
<artifactId>chebiWS-client</artifactId>
<version>2.4</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
package APIs;
import java.util.List;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class ChebiAPI extends ChebiAPIInterface{
static private ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
public static String getMetacycIDUsingExternalReference(String id) {
String identifier = null;
try {
ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
LiteEntityList entities = chebiClient.getLiteEntity(id, SearchCategory.MANUAL_XREFS, 1, StarsCategory.THREE_ONLY);
List<LiteEntity> resultList = entities.getListElement();
String chebiID = "";
for (LiteEntity liteEntity : resultList ) {
chebiID = liteEntity.getChebiId();
}
if(chebiID != null && !chebiID.isEmpty()) {
Entity entity = chebiClient.getCompleteEntity(chebiID);
List<DataItem> db = entity.getDatabaseLinks();
for ( DataItem dataItem : db ) { // List all synonyms
if(dataItem.getType().trim().equalsIgnoreCase("MetaCyc accession")) {
identifier = dataItem.getData();
break;
}
}
}
}
catch (Exception e1) {
e1.printStackTrace();
}
return identifier;
}
}
package utilities; package APIs;
import java.util.List; import java.util.List;
......
...@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; ...@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.FileSystemResource;
import files.FilesUtils; import files.FilesUtils;
import internalDB.FetchCompoundsByName;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity; import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing; import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing;
...@@ -164,18 +165,29 @@ public class ModelSEED { ...@@ -164,18 +165,29 @@ public class ModelSEED {
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
if(!line.isEmpty()) { if(!line.isEmpty() && !line.startsWith("MS ID")) {
Map<String, Set<String>> submap = new HashMap<>(); Map<String, Set<String>> submap = new HashMap<>();
String[] text = line.split("\t"); String[] text = line.split("\t");
String id = text[0].trim(); String id = text[0].trim();
String oldID;
String externalID = text[2]; String externalID = text[2];
String source = text[3].trim(); String source = text[3].trim();
if(text.length == 3) { if(text.length == 3)
externalID = text[1].trim(); externalID = text[1].trim();
else if (!text[1].trim().isEmpty()) {
oldID = text[1].trim();
Integer currentID = FetchCompoundsByName.getIDNumberFormat(id, MetaboliteMajorLabel.ModelSeed);
Integer previousID = FetchCompoundsByName.getIDNumberFormat(oldID, MetaboliteMajorLabel.ModelSeed);
if(previousID < currentID) {
id = oldID;
}
} }
MetaboliteMajorLabel label = null; MetaboliteMajorLabel label = null;
...@@ -230,6 +242,8 @@ public class ModelSEED { ...@@ -230,6 +242,8 @@ public class ModelSEED {
e.printStackTrace(); e.printStackTrace();
} }
System.out.println(data);
return data; return data;
} }
......
This diff is collapsed.
...@@ -42,6 +42,8 @@ public class IdentifyReactionsMetabolites { ...@@ -42,6 +42,8 @@ public class IdentifyReactionsMetabolites {
metabolites = getMetabolitesFromReactions(reactionsData).keySet(); metabolites = getMetabolitesFromReactions(reactionsData).keySet();
System.out.println(metabolites);
logger.info("Total metabolites for search: {}", metabolites.size()); logger.info("Total metabolites for search: {}", metabolites.size());
getMetabolitesIDs(namesAndIDsContainer, service); getMetabolitesIDs(namesAndIDsContainer, service);
...@@ -81,7 +83,6 @@ public class IdentifyReactionsMetabolites { ...@@ -81,7 +83,6 @@ public class IdentifyReactionsMetabolites {
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911 // System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry(); identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975 // System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
...@@ -109,14 +110,18 @@ public class IdentifyReactionsMetabolites { ...@@ -109,14 +110,18 @@ public class IdentifyReactionsMetabolites {
} }
/**
* @param metabolite
* @param ids
*/
private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) { private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) {
MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids); MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids);
if(id != null) { if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>(); Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc)) if(id.equals(MetaboliteMajorLabel.EcoCyc))
...@@ -321,8 +326,14 @@ public class IdentifyReactionsMetabolites { ...@@ -321,8 +326,14 @@ public class IdentifyReactionsMetabolites {
for(String metabolite : new HashSet<>(metabolites)) { for(String metabolite : new HashSet<>(metabolites)) {
// System.out.println(metabolite);
//
// System.out.println("map " + metabolite + "\t" + allMetabolitesByName.get(metabolite));
if(allMetabolitesByName.containsKey(metabolite)) { if(allMetabolitesByName.containsKey(metabolite)) {
// System.out.println("yes");
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite); Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite);
saveMetabolite(metabolite, ids); saveMetabolite(metabolite, ids);
} }
...@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites { ...@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites {
saveMetabolite(metabolite, ids); saveMetabolite(metabolite, ids);
} }
} }
if(metabolite.equalsIgnoreCase("maltooligosaccharides")) { //resolver isto
Map<MetaboliteMajorLabel, String> ids = new HashMap<>();
ids.put(MetaboliteMajorLabel.MetaCyc, "META:Malto-Oligosaccharides");
saveMetabolite(metabolite, ids);
}
} }
} }
...@@ -385,7 +404,12 @@ public class IdentifyReactionsMetabolites { ...@@ -385,7 +404,12 @@ public class IdentifyReactionsMetabolites {
for(String metab : metabs) { for(String metab : metabs) {
metab = metab.replaceAll("^(\\+\\s)", "").replaceAll("^(\\d+)", ""); if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", "");
}
metab = metab.replaceAll("^(\\+\\s)", "");
metabolites.add(metab.trim()); metabolites.add(metab.trim());
} }
...@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites { ...@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites {
String original = metabolite; String original = metabolite;
if(metabolite.matches("(?i).+(-P)$")) if(metabolite.matches("(?i).+(-P)$"))
metabolite = metabolite.replaceAll("(?i)(-P)$", "\\sphosphate"); metabolite = metabolite.replaceAll("(?i)(-P)$", "-phosphate");
metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate"); metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate");
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase()); String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(metabolite.equalsIgnoreCase("Arabinose"))
metabolite = "Arabinoses";
if(metabolite.equalsIgnoreCase("Fatty acyl-CoA"))
metabolite = "Acyl coenzyme A";
if(word != null) { if(word != null) {
if(standardNames.containsKey(word)) if(standardNames.containsKey(word))
standardNames.get(word).add(original); standardNames.get(word).add(original);
...@@ -576,6 +605,12 @@ public class IdentifyReactionsMetabolites { ...@@ -576,6 +605,12 @@ public class IdentifyReactionsMetabolites {
else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound)) else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound))
return MetaboliteMajorLabel.LigandCompound; return MetaboliteMajorLabel.LigandCompound;
else if(ids.containsKey(MetaboliteMajorLabel.BiGG))
return MetaboliteMajorLabel.BiGG;
else if(ids.containsKey(MetaboliteMajorLabel.BiGGMetabolite))
return MetaboliteMajorLabel.BiGGMetabolite;
else { //returns a 'random' key else { //returns a 'random' key
for(MetaboliteMajorLabel key : ids.keySet()) for(MetaboliteMajorLabel key : ids.keySet())
return key; return key;
......
...@@ -3,6 +3,7 @@ package reactions; ...@@ -3,6 +3,7 @@ package reactions;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
...@@ -22,11 +23,14 @@ import org.slf4j.Logger; ...@@ -22,11 +23,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import APIs.UniprotAPIExtension;
import biocomponents.OutputMerlinFormat; import biocomponents.OutputMerlinFormat;
import biocomponents.TriageSBMLLevel3Writer; import biocomponents.TriageSBMLLevel3Writer;
import blast.Blast; import blast.Blast;
import enumerators.MetaboliteReferenceDatabaseEnum; import enumerators.MetaboliteReferenceDatabaseEnum;
import files.FilesUtils; import files.FilesUtils;
import files.ReadExcelFile;
import files.WriteExcel;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import kbase.ModelSEEDCompoundsFileReader; import kbase.ModelSEEDCompoundsFileReader;
import kbase.Reports; import kbase.Reports;
...@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer; ...@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer;
import triageDatabase.TriageGeneralProperties; import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService; import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer; import triageDatabase.TriageNeo4jInitializer;
import triageDatabase.TriageNodeLabel;
import triageDatabase.TriageRelationshipType; import triageDatabase.TriageRelationshipType;
import utilities.UniprotAPIExtension;
import utilities.triage_utilities.Properties; import utilities.triage_utilities.Properties;
import utilities.triage_utilities.Utilities; import utilities.triage_utilities.Utilities;
...@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes { ...@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes {
taxonomies = new HashMap<>(); taxonomies = new HashMap<>();
organisms = new HashMap<>(); organisms = new HashMap<>();
tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>(); tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>();
reactionsToIgnore = new HashSet<>();
reactionContainersByID = new HashMap<>(); reactionContainersByID = new HashMap<>();
this.modelMetabolites = modelMetabolites; this.modelMetabolites = modelMetabolites;
...@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes {
findTaxonomyByTaxonomyID(taxID); findTaxonomyByTaxonomyID(taxID);
try { try {
Blast blast = new Blast(queryPath, properties); //replace the null by the query file path Blast blast = new Blast(true, queryPath, properties);
blastResults = blast.getAlignmentsByQuery(); blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize(); queryFileTotalOfGenes = blast.getQueryFileSize();
...@@ -130,7 +133,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -130,7 +133,7 @@ public class ProvideTransportReactionsToGenes {
while (n != 99) { while (n != 99) {
// tests(); // tests();
start(); //uncomment start(); //uncomment
...@@ -161,10 +164,18 @@ public class ProvideTransportReactionsToGenes { ...@@ -161,10 +164,18 @@ public class ProvideTransportReactionsToGenes {
private void tests() { private void tests() {
Node tcNumberNode = service.findTcNumberNode("3.A.1.2.3"); String tc = "3.D.1.1.1";
Node tcNumberNode = service.findTcNumberNode(tc);
System.out.println(tc);
System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
if(tcNumberNode != null) { if(tcNumberNode != null) {
System.out.println(tcNumberNode.getAllProperties());
Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction); Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
for(Relationship rel : relations) { for(Relationship rel : relations) {
...@@ -179,6 +190,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -179,6 +190,8 @@ public class ProvideTransportReactionsToGenes {
} }
} }
else
System.out.println("nulo");
} }
private void findTaxonomyByTaxonomyID(Integer taxID) { private void findTaxonomyByTaxonomyID(Integer taxID) {
...@@ -226,7 +239,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -226,7 +239,8 @@ public class ProvideTransportReactionsToGenes {
try { try {
tests(); reactionsToIgnore = new HashSet<>();
// tests();
logger.debug("Searching reactions..."); logger.debug("Searching reactions...");
...@@ -234,7 +248,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -234,7 +248,7 @@ public class ProvideTransportReactionsToGenes {
reactionsByTcNumber = getReactionsByTcNumber(identifyTcNumbersForSearch()); reactionsByTcNumber = getReactionsByTcNumber(identifyTcNumbersForSearch());
System.out.println(reactionsByTcNumber); // System.out.println(reactionsByTcNumber);
if(reactionContainersByID.keySet().size() == 0) if(reactionContainersByID.keySet().size() == 0)
logger.warn("No metabolites present in the model are available in the selected reactions!"); logger.warn("No metabolites present in the model are available in the selected reactions!");
...@@ -245,27 +259,27 @@ public class ProvideTransportReactionsToGenes { ...@@ -245,27 +259,27 @@ public class ProvideTransportReactionsToGenes {
String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\"; String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
// generateFinalResults(similaritiesResults); // generateFinalResults(similaritiesResults);
generateFinalResultsAuxValidation(similaritiesResults, path); generateFinalResultsAuxValidation(similaritiesResults, path);
System.out.println(finalResults); // System.out.println(finalResults);
logger.debug("Reactions search complete!"); logger.debug("Reactions search complete!");
// OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID); Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
// Container container = new Container(output); OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
// container.verifyDepBetweenClass();
// Container container = new Container(output);
// String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json"); container.verifyDepBetweenClass();
//
// TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false); String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
//
// sbml.writeToFile(); TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
//
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service); sbml.writeToFile();
//
// validation(container, reactionsIDS, path); validation(container, reactionsIDS, path);
} }
catch (Exception e) { catch (Exception e) {
...@@ -318,6 +332,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -318,6 +332,8 @@ public class ProvideTransportReactionsToGenes {
for(String queryAccession : resultsByEvalue.keySet()) { for(String queryAccession : resultsByEvalue.keySet()) {
// System.out.println(queryAccession);
Map<String, Set<String>> res = new HashMap<>(); Map<String, Set<String>> res = new HashMap<>();
for(String tcNumber : resultsByEvalue.get(queryAccession)) { for(String tcNumber : resultsByEvalue.get(queryAccession)) {
...@@ -338,8 +354,9 @@ public class ProvideTransportReactionsToGenes { ...@@ -338,8 +354,9 @@ public class ProvideTransportReactionsToGenes {
Map<String, String> locus = Map<String, String> locus =
FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt")); FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt"));
for(String key : finalResultsAux.keySet()) for(String key : finalResultsAux.keySet()) {
finalResults.put(locus.get(key), finalResultsAux.get(key)); finalResults.put(locus.get(key.split("\\s+")[0]), finalResultsAux.get(key));
}
} }
/** /**
...@@ -349,7 +366,6 @@ public class ProvideTransportReactionsToGenes { ...@@ -349,7 +366,6 @@ public class ProvideTransportReactionsToGenes {
private void validation(Container containerTriage, Map<String, String> reactionsIDS, String path) { private void validation(Container containerTriage, Map<String, String> reactionsIDS, String path) {