Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 990e6ca8 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refatoring names retriever from biosynth database + validation

parent c82612ac
......@@ -16,8 +16,16 @@
<!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> -->
<url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository>
<repository>
<id>ebi-repo</id>
<name>ebi-repo</name>
<url>http://www.ebi.ac.uk/intact/maven/nexus/content/repositories/ebi-repo/</url>
</repository>
</repositories>
<build>
<plugins>
<!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId>
......@@ -59,6 +67,16 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions>
</dependency>
......@@ -253,5 +271,12 @@
<version>1.2.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.ebi.chebi.webapps.chebiWS.client/chebiWS-client -->
<dependency>
<groupId>uk.ac.ebi.chebi.webapps.chebiWS.client</groupId>
<artifactId>chebiWS-client</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
</project>
package APIs;
import java.util.List;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class ChebiAPI extends ChebiAPIInterface{
static private ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
public static String getMetacycIDUsingExternalReference(String id) {
String identifier = null;
try {
ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
LiteEntityList entities = chebiClient.getLiteEntity(id, SearchCategory.MANUAL_XREFS, 1, StarsCategory.THREE_ONLY);
List<LiteEntity> resultList = entities.getListElement();
String chebiID = "";
for (LiteEntity liteEntity : resultList ) {
chebiID = liteEntity.getChebiId();
}
if(chebiID != null && !chebiID.isEmpty()) {
Entity entity = chebiClient.getCompleteEntity(chebiID);
List<DataItem> db = entity.getDatabaseLinks();
for ( DataItem dataItem : db ) { // List all synonyms
if(dataItem.getType().trim().equalsIgnoreCase("MetaCyc accession")) {
identifier = dataItem.getData();
break;
}
}
}
}
catch (Exception e1) {
e1.printStackTrace();
}
return identifier;
}
}
......@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource;
import files.FilesUtils;
import internalDB.FetchCompoundsByName;
import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing;
......@@ -164,18 +165,29 @@ public class ModelSEED {
while ((line = reader.readLine()) != null) {
if(!line.isEmpty()) {
if(!line.isEmpty() && !line.startsWith("MS ID")) {
Map<String, Set<String>> submap = new HashMap<>();
String[] text = line.split("\t");
String id = text[0].trim();
String oldID;
String externalID = text[2];
String source = text[3].trim();
if(text.length == 3) {
if(text.length == 3)
externalID = text[1].trim();
else if (!text[1].trim().isEmpty()) {
oldID = text[1].trim();
Integer currentID = FetchCompoundsByName.getIDNumberFormat(id, MetaboliteMajorLabel.ModelSeed);
Integer previousID = FetchCompoundsByName.getIDNumberFormat(oldID, MetaboliteMajorLabel.ModelSeed);
if(previousID < currentID) {
id = oldID;
}
}
MetaboliteMajorLabel label = null;
......@@ -229,6 +241,8 @@ public class ModelSEED {
catch (IOException e) {
e.printStackTrace();
}
System.out.println(data);
return data;
}
......
This diff is collapsed.
......@@ -41,7 +41,9 @@ public class IdentifyReactionsMetabolites {
// Set<String> tcdbMetabolites = getMetabolitesFromReactions(reactionsData);
metabolites = getMetabolitesFromReactions(reactionsData).keySet();
System.out.println(metabolites);
logger.info("Total metabolites for search: {}", metabolites.size());
getMetabolitesIDs(namesAndIDsContainer, service);
......@@ -80,8 +82,7 @@ public class IdentifyReactionsMetabolites {
identificationByDirectMatch();
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
......@@ -103,20 +104,24 @@ public class IdentifyReactionsMetabolites {
identificationReplacingNonAlphanumericAndInLowercase();
// System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
return tcdbMetabolitesIDs;
}
/**
* @param metabolite
* @param ids
*/
private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) {
MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids);
if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc))
......@@ -136,7 +141,7 @@ public class IdentifyReactionsMetabolites {
private void identificationIntroducingDandL() {
for(String metabolite : new HashSet<>(metabolites)) {
if(metabolite.matches("^(D*L*-+).+")){
if(namesAndIDsContainer.getNamesWithoutSigns().containsKey(metabolite.replaceAll("^(D*L*-+)", ""))) {
......@@ -219,7 +224,7 @@ public class IdentifyReactionsMetabolites {
private void identificationInLowerCase() {
for(String metabolite : new HashSet<>(metabolites)) {
if(namesAndIDsContainer.getNamesLowerCase().containsKey(metabolite.toLowerCase())) {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.toLowerCase());
......@@ -320,9 +325,15 @@ public class IdentifyReactionsMetabolites {
private void identificationByDirectMatch() {
for(String metabolite : new HashSet<>(metabolites)) {
// System.out.println(metabolite);
//
// System.out.println("map " + metabolite + "\t" + allMetabolitesByName.get(metabolite));
if(allMetabolitesByName.containsKey(metabolite)) {
// System.out.println("yes");
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite);
saveMetabolite(metabolite, ids);
}
......@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites {
saveMetabolite(metabolite, ids);
}
}
if(metabolite.equalsIgnoreCase("maltooligosaccharides")) { //resolver isto
Map<MetaboliteMajorLabel, String> ids = new HashMap<>();
ids.put(MetaboliteMajorLabel.MetaCyc, "META:Malto-Oligosaccharides");
saveMetabolite(metabolite, ids);
}
}
}
......@@ -350,11 +369,11 @@ public class IdentifyReactionsMetabolites {
Map<String, Set<String>> metabolites = new HashMap<>();
for(String accession : data.keySet()) {
for(TcNumberContainer2 tcContainer : data.get(accession)) {
for(int id : tcContainer.getAllReactionsIds()) {
ReactionContainer reactionContainer = tcContainer.getReactionContainer(id);
String reaction = reactionContainer.getReaction();
......@@ -384,8 +403,13 @@ public class IdentifyReactionsMetabolites {
String[] metabs = reaction.split(" \\+ ");
for(String metab : metabs) {
if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", "");
}
metab = metab.replaceAll("^(\\+\\s)", "").replaceAll("^(\\d+)", "");
metab = metab.replaceAll("^(\\+\\s)", "");
metabolites.add(metab.trim());
}
......@@ -429,9 +453,9 @@ public class IdentifyReactionsMetabolites {
private static BiosynthMetabolites standardizationOfNames2(BiosynthMetabolites namesAndIDsContainer) {
Synonyms dictionary = new Synonyms();
for(String metabolite : new HashSet<>(namesAndIDsContainer.getMetabolitesIDs().keySet())) {
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(word != null) {
......@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites {
String original = metabolite;
if(metabolite.matches("(?i).+(-P)$"))
metabolite = metabolite.replaceAll("(?i)(-P)$", "\\sphosphate");
metabolite = metabolite.replaceAll("(?i)(-P)$", "-phosphate");
metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate");
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(metabolite.equalsIgnoreCase("Arabinose"))
metabolite = "Arabinoses";
if(metabolite.equalsIgnoreCase("Fatty acyl-CoA"))
metabolite = "Acyl coenzyme A";
if(word != null) {
if(standardNames.containsKey(word))
standardNames.get(word).add(original);
......@@ -575,6 +604,12 @@ public class IdentifyReactionsMetabolites {
else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound))
return MetaboliteMajorLabel.LigandCompound;
else if(ids.containsKey(MetaboliteMajorLabel.BiGG))
return MetaboliteMajorLabel.BiGG;
else if(ids.containsKey(MetaboliteMajorLabel.BiGGMetabolite))
return MetaboliteMajorLabel.BiGGMetabolite;
else { //returns a 'random' key
for(MetaboliteMajorLabel key : ids.keySet())
......
......@@ -3,6 +3,7 @@ package reactions;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
......@@ -22,11 +23,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import APIs.UniprotAPIExtension;
import biocomponents.OutputMerlinFormat;
import biocomponents.TriageSBMLLevel3Writer;
import blast.Blast;
import enumerators.MetaboliteReferenceDatabaseEnum;
import files.FilesUtils;
import files.ReadExcelFile;
import files.WriteExcel;
import internalDB.WriteByMetabolitesID;
import kbase.ModelSEEDCompoundsFileReader;
import kbase.Reports;
......@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer;
import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer;
import triageDatabase.TriageNodeLabel;
import triageDatabase.TriageRelationshipType;
import utilities.UniprotAPIExtension;
import utilities.triage_utilities.Properties;
import utilities.triage_utilities.Utilities;
......@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes {
taxonomies = new HashMap<>();
organisms = new HashMap<>();
tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>();
reactionsToIgnore = new HashSet<>();
reactionContainersByID = new HashMap<>();
this.modelMetabolites = modelMetabolites;
......@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes {
findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(queryPath, properties); //replace the null by the query file path
Blast blast = new Blast(true, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize();
......@@ -121,30 +124,30 @@ public class ProvideTransportReactionsToGenes {
Transaction dataTx = graphDatabaseService.beginTx();
//////////////////////////
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
int n = 1;
while (n != 99) {
// tests();
start(); //uncomment
System.out.println("Insert a number to repeat or 99 to finish");
try {
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
}
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
int n = 1;
while (n != 99) {
// tests();
start(); //uncomment
System.out.println("Insert a number to repeat or 99 to finish");
try {
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
}
/////////////////////////
dataTx.close();
service.shutdown();
......@@ -158,27 +161,37 @@ public class ProvideTransportReactionsToGenes {
e.printStackTrace();
}
}
private void tests() {
Node tcNumberNode = service.findTcNumberNode("3.A.1.2.3");
String tc = "3.D.1.1.1";
Node tcNumberNode = service.findTcNumberNode(tc);
System.out.println(tc);
System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
if(tcNumberNode != null) {
System.out.println(tcNumberNode.getAllProperties());
Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
for(Relationship rel : relations) {
System.out.println(rel.getEndNode().getAllProperties());
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty("Reaction"));
if(rel.getEndNode().hasProperty(TriageGeneralProperties.ReactionModelSEED.toString()))
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty(TriageGeneralProperties.ReactionModelSEED.toString()));
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty(TriageGeneralProperties.ReactionModelSEED.toString()));
}
}
else
System.out.println("nulo");
}
private void findTaxonomyByTaxonomyID(Integer taxID) {
......@@ -225,16 +238,17 @@ public class ProvideTransportReactionsToGenes {
private void start() {
try {
tests();
reactionsToIgnore = new HashSet<>();
// tests();
logger.debug("Searching reactions...");
resultsByEvalue = ReactionsPredictor.getReactionsForGenesByEvalue(blastResults, properties.geteValueThreshold());
reactionsByTcNumber = getReactionsByTcNumber(identifyTcNumbersForSearch());
System.out.println(reactionsByTcNumber);
// System.out.println(reactionsByTcNumber);
if(reactionContainersByID.keySet().size() == 0)
logger.warn("No metabolites present in the model are available in the selected reactions!");
......@@ -244,28 +258,28 @@ public class ProvideTransportReactionsToGenes {
Map<String, Map<String, Set<String>>> similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(data, reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore);
String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
// generateFinalResults(similaritiesResults);
// generateFinalResults(similaritiesResults);
generateFinalResultsAuxValidation(similaritiesResults, path);
System.out.println(finalResults);
// System.out.println(finalResults);
logger.debug("Reactions search complete!");
// OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
//
// Container container = new Container(output);
// container.verifyDepBetweenClass();
//
// String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
//
// TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
//
// sbml.writeToFile();
//
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
// validation(container, reactionsIDS, path);
Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
Container container = new Container(output);
container.verifyDepBetweenClass();
String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
sbml.writeToFile();
validation(container, reactionsIDS, path);
}
catch (Exception e) {
......@@ -280,7 +294,7 @@ public class ProvideTransportReactionsToGenes {
* @param similaritiesResults
*/
private void generateFinalResults(Map<String, Map<String, Set<String>>> similaritiesResults) {
finalResults = new HashMap<>(similaritiesResults);
for(String queryAccession : resultsByEvalue.keySet()) {
......@@ -302,22 +316,24 @@ public class ProvideTransportReactionsToGenes {
finalResults.put(queryAccession, res);
}
}
/**
* Merge the results of both methods
*
* @param similaritiesResults
*/
private void generateFinalResultsAuxValidation(Map<String, Map<String, Set<String>>> similaritiesResults, String path) {
Map<String, Map<String, Set<String>>> finalResultsAux = new HashMap<>(similaritiesResults);
finalResultsAux = new HashMap<>(similaritiesResults);
finalResults = new HashMap<>();
for(String queryAccession : resultsByEvalue.keySet()) {
// System.out.println(queryAccession);
Map<String, Set<String>> res = new HashMap<>();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
......@@ -334,12 +350,13 @@ public class ProvideTransportReactionsToGenes {
finalResultsAux.put(queryAccession, res);
}
Map<String, String> locus =