Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 990e6ca8 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refatoring names retriever from biosynth database + validation

parent c82612ac
...@@ -16,8 +16,16 @@ ...@@ -16,8 +16,16 @@
<!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> --> <!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> -->
<url>http://193.137.11.210/nexus/content/groups/public/</url> <url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository> </repository>
<repository>
<id>ebi-repo</id>
<name>ebi-repo</name>
<url>http://www.ebi.ac.uk/intact/maven/nexus/content/repositories/ebi-repo/</url>
</repository>
</repositories> </repositories>
<build> <build>
<plugins> <plugins>
<!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId>
...@@ -59,6 +67,16 @@ ...@@ -59,6 +67,16 @@
<groupId>org.apache.logging.log4j</groupId> <groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId> <artifactId>log4j-slf4j-impl</artifactId>
</exclusion> </exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions> </exclusions>
</dependency> </dependency>
...@@ -253,5 +271,12 @@ ...@@ -253,5 +271,12 @@
<version>1.2.3</version> <version>1.2.3</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.ebi.chebi.webapps.chebiWS.client/chebiWS-client -->
<dependency>
<groupId>uk.ac.ebi.chebi.webapps.chebiWS.client</groupId>
<artifactId>chebiWS-client</artifactId>
<version>2.4</version>
</dependency>
</dependencies> </dependencies>
</project> </project>
package APIs;
import java.util.List;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class ChebiAPI extends ChebiAPIInterface{
static private ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
public static String getMetacycIDUsingExternalReference(String id) {
String identifier = null;
try {
ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
LiteEntityList entities = chebiClient.getLiteEntity(id, SearchCategory.MANUAL_XREFS, 1, StarsCategory.THREE_ONLY);
List<LiteEntity> resultList = entities.getListElement();
String chebiID = "";
for (LiteEntity liteEntity : resultList ) {
chebiID = liteEntity.getChebiId();
}
if(chebiID != null && !chebiID.isEmpty()) {
Entity entity = chebiClient.getCompleteEntity(chebiID);
List<DataItem> db = entity.getDatabaseLinks();
for ( DataItem dataItem : db ) { // List all synonyms
if(dataItem.getType().trim().equalsIgnoreCase("MetaCyc accession")) {
identifier = dataItem.getData();
break;
}
}
}
}
catch (Exception e1) {
e1.printStackTrace();
}
return identifier;
}
}
package utilities; package APIs;
import java.util.List; import java.util.List;
......
...@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory; ...@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource; import org.springframework.core.io.FileSystemResource;
import files.FilesUtils; import files.FilesUtils;
import internalDB.FetchCompoundsByName;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity; import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing; import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing;
...@@ -164,18 +165,29 @@ public class ModelSEED { ...@@ -164,18 +165,29 @@ public class ModelSEED {
while ((line = reader.readLine()) != null) { while ((line = reader.readLine()) != null) {
if(!line.isEmpty()) { if(!line.isEmpty() && !line.startsWith("MS ID")) {
Map<String, Set<String>> submap = new HashMap<>(); Map<String, Set<String>> submap = new HashMap<>();
String[] text = line.split("\t"); String[] text = line.split("\t");
String id = text[0].trim(); String id = text[0].trim();
String oldID;
String externalID = text[2]; String externalID = text[2];
String source = text[3].trim(); String source = text[3].trim();
if(text.length == 3) { if(text.length == 3)
externalID = text[1].trim(); externalID = text[1].trim();
else if (!text[1].trim().isEmpty()) {
oldID = text[1].trim();
Integer currentID = FetchCompoundsByName.getIDNumberFormat(id, MetaboliteMajorLabel.ModelSeed);
Integer previousID = FetchCompoundsByName.getIDNumberFormat(oldID, MetaboliteMajorLabel.ModelSeed);
if(previousID < currentID) {
id = oldID;
}
} }
MetaboliteMajorLabel label = null; MetaboliteMajorLabel label = null;
...@@ -229,6 +241,8 @@ public class ModelSEED { ...@@ -229,6 +241,8 @@ public class ModelSEED {
catch (IOException e) { catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
System.out.println(data);
return data; return data;
} }
......
This diff is collapsed.
...@@ -41,7 +41,9 @@ public class IdentifyReactionsMetabolites { ...@@ -41,7 +41,9 @@ public class IdentifyReactionsMetabolites {
// Set<String> tcdbMetabolites = getMetabolitesFromReactions(reactionsData); // Set<String> tcdbMetabolites = getMetabolitesFromReactions(reactionsData);
metabolites = getMetabolitesFromReactions(reactionsData).keySet(); metabolites = getMetabolitesFromReactions(reactionsData).keySet();
System.out.println(metabolites);
logger.info("Total metabolites for search: {}", metabolites.size()); logger.info("Total metabolites for search: {}", metabolites.size());
getMetabolitesIDs(namesAndIDsContainer, service); getMetabolitesIDs(namesAndIDsContainer, service);
...@@ -80,8 +82,7 @@ public class IdentifyReactionsMetabolites { ...@@ -80,8 +82,7 @@ public class IdentifyReactionsMetabolites {
identificationByDirectMatch(); identificationByDirectMatch();
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911 // System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry(); identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975 // System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
...@@ -103,20 +104,24 @@ public class IdentifyReactionsMetabolites { ...@@ -103,20 +104,24 @@ public class IdentifyReactionsMetabolites {
identificationReplacingNonAlphanumericAndInLowercase(); identificationReplacingNonAlphanumericAndInLowercase();
// System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109 // System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
return tcdbMetabolitesIDs; return tcdbMetabolitesIDs;
} }
/**
* @param metabolite
* @param ids
*/
private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) { private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) {
MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids); MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids);
if(id != null) { if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>(); Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc)) if(id.equals(MetaboliteMajorLabel.EcoCyc))
...@@ -136,7 +141,7 @@ public class IdentifyReactionsMetabolites { ...@@ -136,7 +141,7 @@ public class IdentifyReactionsMetabolites {
private void identificationIntroducingDandL() { private void identificationIntroducingDandL() {
for(String metabolite : new HashSet<>(metabolites)) { for(String metabolite : new HashSet<>(metabolites)) {
if(metabolite.matches("^(D*L*-+).+")){ if(metabolite.matches("^(D*L*-+).+")){
if(namesAndIDsContainer.getNamesWithoutSigns().containsKey(metabolite.replaceAll("^(D*L*-+)", ""))) { if(namesAndIDsContainer.getNamesWithoutSigns().containsKey(metabolite.replaceAll("^(D*L*-+)", ""))) {
...@@ -219,7 +224,7 @@ public class IdentifyReactionsMetabolites { ...@@ -219,7 +224,7 @@ public class IdentifyReactionsMetabolites {
private void identificationInLowerCase() { private void identificationInLowerCase() {
for(String metabolite : new HashSet<>(metabolites)) { for(String metabolite : new HashSet<>(metabolites)) {
if(namesAndIDsContainer.getNamesLowerCase().containsKey(metabolite.toLowerCase())) { if(namesAndIDsContainer.getNamesLowerCase().containsKey(metabolite.toLowerCase())) {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.toLowerCase()); Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.toLowerCase());
...@@ -320,9 +325,15 @@ public class IdentifyReactionsMetabolites { ...@@ -320,9 +325,15 @@ public class IdentifyReactionsMetabolites {
private void identificationByDirectMatch() { private void identificationByDirectMatch() {
for(String metabolite : new HashSet<>(metabolites)) { for(String metabolite : new HashSet<>(metabolites)) {
// System.out.println(metabolite);
//
// System.out.println("map " + metabolite + "\t" + allMetabolitesByName.get(metabolite));
if(allMetabolitesByName.containsKey(metabolite)) { if(allMetabolitesByName.containsKey(metabolite)) {
// System.out.println("yes");
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite); Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite);
saveMetabolite(metabolite, ids); saveMetabolite(metabolite, ids);
} }
...@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites { ...@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites {
saveMetabolite(metabolite, ids); saveMetabolite(metabolite, ids);
} }
} }
if(metabolite.equalsIgnoreCase("maltooligosaccharides")) { //resolver isto
Map<MetaboliteMajorLabel, String> ids = new HashMap<>();
ids.put(MetaboliteMajorLabel.MetaCyc, "META:Malto-Oligosaccharides");
saveMetabolite(metabolite, ids);
}
} }
} }
...@@ -350,11 +369,11 @@ public class IdentifyReactionsMetabolites { ...@@ -350,11 +369,11 @@ public class IdentifyReactionsMetabolites {
Map<String, Set<String>> metabolites = new HashMap<>(); Map<String, Set<String>> metabolites = new HashMap<>();
for(String accession : data.keySet()) { for(String accession : data.keySet()) {
for(TcNumberContainer2 tcContainer : data.get(accession)) { for(TcNumberContainer2 tcContainer : data.get(accession)) {
for(int id : tcContainer.getAllReactionsIds()) { for(int id : tcContainer.getAllReactionsIds()) {
ReactionContainer reactionContainer = tcContainer.getReactionContainer(id); ReactionContainer reactionContainer = tcContainer.getReactionContainer(id);
String reaction = reactionContainer.getReaction(); String reaction = reactionContainer.getReaction();
...@@ -384,8 +403,13 @@ public class IdentifyReactionsMetabolites { ...@@ -384,8 +403,13 @@ public class IdentifyReactionsMetabolites {
String[] metabs = reaction.split(" \\+ "); String[] metabs = reaction.split(" \\+ ");
for(String metab : metabs) { for(String metab : metabs) {
if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", "");
}
metab = metab.replaceAll("^(\\+\\s)", "").replaceAll("^(\\d+)", ""); metab = metab.replaceAll("^(\\+\\s)", "");
metabolites.add(metab.trim()); metabolites.add(metab.trim());
} }
...@@ -429,9 +453,9 @@ public class IdentifyReactionsMetabolites { ...@@ -429,9 +453,9 @@ public class IdentifyReactionsMetabolites {
private static BiosynthMetabolites standardizationOfNames2(BiosynthMetabolites namesAndIDsContainer) { private static BiosynthMetabolites standardizationOfNames2(BiosynthMetabolites namesAndIDsContainer) {
Synonyms dictionary = new Synonyms(); Synonyms dictionary = new Synonyms();
for(String metabolite : new HashSet<>(namesAndIDsContainer.getMetabolitesIDs().keySet())) { for(String metabolite : new HashSet<>(namesAndIDsContainer.getMetabolitesIDs().keySet())) {
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase()); String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(word != null) { if(word != null) {
...@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites { ...@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites {
String original = metabolite; String original = metabolite;
if(metabolite.matches("(?i).+(-P)$")) if(metabolite.matches("(?i).+(-P)$"))
metabolite = metabolite.replaceAll("(?i)(-P)$", "\\sphosphate"); metabolite = metabolite.replaceAll("(?i)(-P)$", "-phosphate");
metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate"); metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate");
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase()); String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(metabolite.equalsIgnoreCase("Arabinose"))
metabolite = "Arabinoses";
if(metabolite.equalsIgnoreCase("Fatty acyl-CoA"))
metabolite = "Acyl coenzyme A";
if(word != null) { if(word != null) {
if(standardNames.containsKey(word)) if(standardNames.containsKey(word))
standardNames.get(word).add(original); standardNames.get(word).add(original);
...@@ -575,6 +604,12 @@ public class IdentifyReactionsMetabolites { ...@@ -575,6 +604,12 @@ public class IdentifyReactionsMetabolites {
else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound)) else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound))
return MetaboliteMajorLabel.LigandCompound; return MetaboliteMajorLabel.LigandCompound;
else if(ids.containsKey(MetaboliteMajorLabel.BiGG))
return MetaboliteMajorLabel.BiGG;
else if(ids.containsKey(MetaboliteMajorLabel.BiGGMetabolite))
return MetaboliteMajorLabel.BiGGMetabolite;
else { //returns a 'random' key else { //returns a 'random' key
for(MetaboliteMajorLabel key : ids.keySet()) for(MetaboliteMajorLabel key : ids.keySet())
......
...@@ -3,6 +3,7 @@ package reactions; ...@@ -3,6 +3,7 @@ package reactions;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
...@@ -22,11 +23,14 @@ import org.slf4j.Logger; ...@@ -22,11 +23,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import APIs.UniprotAPIExtension;
import biocomponents.OutputMerlinFormat; import biocomponents.OutputMerlinFormat;
import biocomponents.TriageSBMLLevel3Writer; import biocomponents.TriageSBMLLevel3Writer;
import blast.Blast; import blast.Blast;
import enumerators.MetaboliteReferenceDatabaseEnum; import enumerators.MetaboliteReferenceDatabaseEnum;
import files.FilesUtils; import files.FilesUtils;
import files.ReadExcelFile;
import files.WriteExcel;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import kbase.ModelSEEDCompoundsFileReader; import kbase.ModelSEEDCompoundsFileReader;
import kbase.Reports; import kbase.Reports;
...@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer; ...@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer;
import triageDatabase.TriageGeneralProperties; import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService; import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer; import triageDatabase.TriageNeo4jInitializer;
import triageDatabase.TriageNodeLabel;
import triageDatabase.TriageRelationshipType; import triageDatabase.TriageRelationshipType;
import utilities.UniprotAPIExtension;
import utilities.triage_utilities.Properties; import utilities.triage_utilities.Properties;
import utilities.triage_utilities.Utilities; import utilities.triage_utilities.Utilities;
...@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes { ...@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes {
taxonomies = new HashMap<>(); taxonomies = new HashMap<>();
organisms = new HashMap<>(); organisms = new HashMap<>();
tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>(); tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>();
reactionsToIgnore = new HashSet<>();
reactionContainersByID = new HashMap<>(); reactionContainersByID = new HashMap<>();
this.modelMetabolites = modelMetabolites; this.modelMetabolites = modelMetabolites;
...@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes {
findTaxonomyByTaxonomyID(taxID); findTaxonomyByTaxonomyID(taxID);
try { try {
Blast blast = new Blast(queryPath, properties); //replace the null by the query file path Blast blast = new Blast(true, queryPath, properties);
blastResults = blast.getAlignmentsByQuery(); blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize(); queryFileTotalOfGenes = blast.getQueryFileSize();
...@@ -121,30 +124,30 @@ public class ProvideTransportReactionsToGenes { ...@@ -121,30 +124,30 @@ public class ProvideTransportReactionsToGenes {
Transaction dataTx = graphDatabaseService.beginTx(); Transaction dataTx = graphDatabaseService.beginTx();
////////////////////////// //////////////////////////
@SuppressWarnings("resource") @SuppressWarnings("resource")
Scanner reader = new Scanner(System.in); Scanner reader = new Scanner(System.in);
int n = 1; int n = 1;
while (n != 99) { while (n != 99) {
// tests(); // tests();
start(); //uncomment start(); //uncomment
System.out.println("Insert a number to repeat or 99 to finish"); System.out.println("Insert a number to repeat or 99 to finish");
try { try {
n = reader.nextInt(); n = reader.nextInt();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
n = reader.nextInt(); n = reader.nextInt();
} }
} }
///////////////////////// /////////////////////////
dataTx.close(); dataTx.close();
service.shutdown(); service.shutdown();
...@@ -158,27 +161,37 @@ public class ProvideTransportReactionsToGenes { ...@@ -158,27 +161,37 @@ public class ProvideTransportReactionsToGenes {
e.printStackTrace(); e.printStackTrace();
} }
} }
private void tests() { private void tests() {
Node tcNumberNode = service.findTcNumberNode("3.A.1.2.3"); String tc = "3.D.1.1.1";
Node tcNumberNode = service.findTcNumberNode(tc);