Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 990e6ca8 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refatoring names retriever from biosynth database + validation

parent c82612ac
......@@ -16,8 +16,16 @@
<!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> -->
<url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository>
<repository>
<id>ebi-repo</id>
<name>ebi-repo</name>
<url>http://www.ebi.ac.uk/intact/maven/nexus/content/repositories/ebi-repo/</url>
</repository>
</repositories>
<build>
<plugins>
<!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId>
......@@ -59,6 +67,16 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions>
</dependency>
......@@ -253,5 +271,12 @@
<version>1.2.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.ebi.chebi.webapps.chebiWS.client/chebiWS-client -->
<dependency>
<groupId>uk.ac.ebi.chebi.webapps.chebiWS.client</groupId>
<artifactId>chebiWS-client</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
</project>
package APIs;
import java.util.List;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class ChebiAPI extends ChebiAPIInterface{
static private ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
public static String getMetacycIDUsingExternalReference(String id) {
String identifier = null;
try {
ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
LiteEntityList entities = chebiClient.getLiteEntity(id, SearchCategory.MANUAL_XREFS, 1, StarsCategory.THREE_ONLY);
List<LiteEntity> resultList = entities.getListElement();
String chebiID = "";
for (LiteEntity liteEntity : resultList ) {
chebiID = liteEntity.getChebiId();
}
if(chebiID != null && !chebiID.isEmpty()) {
Entity entity = chebiClient.getCompleteEntity(chebiID);
List<DataItem> db = entity.getDatabaseLinks();
for ( DataItem dataItem : db ) { // List all synonyms
if(dataItem.getType().trim().equalsIgnoreCase("MetaCyc accession")) {
identifier = dataItem.getData();
break;
}
}
}
}
catch (Exception e1) {
e1.printStackTrace();
}
return identifier;
}
}
......@@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource;
import files.FilesUtils;
import internalDB.FetchCompoundsByName;
import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.GraphMetaboliteEntity;
import pt.uminho.sysbio.biosynth.integration.etl.CentralMetaboliteEtlDataCleansing;
......@@ -164,18 +165,29 @@ public class ModelSEED {
while ((line = reader.readLine()) != null) {
if(!line.isEmpty()) {
if(!line.isEmpty() && !line.startsWith("MS ID")) {
Map<String, Set<String>> submap = new HashMap<>();
String[] text = line.split("\t");
String id = text[0].trim();
String oldID;
String externalID = text[2];
String source = text[3].trim();
if(text.length == 3) {
if(text.length == 3)
externalID = text[1].trim();
else if (!text[1].trim().isEmpty()) {
oldID = text[1].trim();
Integer currentID = FetchCompoundsByName.getIDNumberFormat(id, MetaboliteMajorLabel.ModelSeed);
Integer previousID = FetchCompoundsByName.getIDNumberFormat(oldID, MetaboliteMajorLabel.ModelSeed);
if(previousID < currentID) {
id = oldID;
}
}
MetaboliteMajorLabel label = null;
......@@ -230,6 +242,8 @@ public class ModelSEED {
e.printStackTrace();
}
System.out.println(data);
return data;
}
......
This diff is collapsed.
......@@ -42,6 +42,8 @@ public class IdentifyReactionsMetabolites {
metabolites = getMetabolitesFromReactions(reactionsData).keySet();
System.out.println(metabolites);
logger.info("Total metabolites for search: {}", metabolites.size());
getMetabolitesIDs(namesAndIDsContainer, service);
......@@ -81,7 +83,6 @@ public class IdentifyReactionsMetabolites {
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
......@@ -109,14 +110,18 @@ public class IdentifyReactionsMetabolites {
}
/**
* @param metabolite
* @param ids
*/
private void saveMetabolite(String metabolite, Map<MetaboliteMajorLabel, String> ids) {
MetaboliteMajorLabel id = selectMetaboliteMajorLabel(metabolite, ids);
if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc))
......@@ -321,8 +326,14 @@ public class IdentifyReactionsMetabolites {
for(String metabolite : new HashSet<>(metabolites)) {
// System.out.println(metabolite);
//
// System.out.println("map " + metabolite + "\t" + allMetabolitesByName.get(metabolite));
if(allMetabolitesByName.containsKey(metabolite)) {
// System.out.println("yes");
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite);
saveMetabolite(metabolite, ids);
}
......@@ -335,6 +346,14 @@ public class IdentifyReactionsMetabolites {
saveMetabolite(metabolite, ids);
}
}
if(metabolite.equalsIgnoreCase("maltooligosaccharides")) { //resolver isto
Map<MetaboliteMajorLabel, String> ids = new HashMap<>();
ids.put(MetaboliteMajorLabel.MetaCyc, "META:Malto-Oligosaccharides");
saveMetabolite(metabolite, ids);
}
}
}
......@@ -385,7 +404,12 @@ public class IdentifyReactionsMetabolites {
for(String metab : metabs) {
metab = metab.replaceAll("^(\\+\\s)", "").replaceAll("^(\\d+)", "");
if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", "");
}
metab = metab.replaceAll("^(\\+\\s)", "");
metabolites.add(metab.trim());
}
......@@ -491,12 +515,17 @@ public class IdentifyReactionsMetabolites {
String original = metabolite;
if(metabolite.matches("(?i).+(-P)$"))
metabolite = metabolite.replaceAll("(?i)(-P)$", "\\sphosphate");
metabolite = metabolite.replaceAll("(?i)(-P)$", "-phosphate");
metabolite = metabolite.replaceAll("(?i)(ic acids*)", "ate");
String word = dictionary.getSynonym(metabolite.replaceAll("\\s+", "").toLowerCase());
if(metabolite.equalsIgnoreCase("Arabinose"))
metabolite = "Arabinoses";
if(metabolite.equalsIgnoreCase("Fatty acyl-CoA"))
metabolite = "Acyl coenzyme A";
if(word != null) {
if(standardNames.containsKey(word))
standardNames.get(word).add(original);
......@@ -576,6 +605,12 @@ public class IdentifyReactionsMetabolites {
else if(ids.containsKey(MetaboliteMajorLabel.LigandCompound))
return MetaboliteMajorLabel.LigandCompound;
else if(ids.containsKey(MetaboliteMajorLabel.BiGG))
return MetaboliteMajorLabel.BiGG;
else if(ids.containsKey(MetaboliteMajorLabel.BiGGMetabolite))
return MetaboliteMajorLabel.BiGGMetabolite;
else { //returns a 'random' key
for(MetaboliteMajorLabel key : ids.keySet())
return key;
......
......@@ -3,6 +3,7 @@ package reactions;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
......@@ -22,11 +23,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import APIs.UniprotAPIExtension;
import biocomponents.OutputMerlinFormat;
import biocomponents.TriageSBMLLevel3Writer;
import blast.Blast;
import enumerators.MetaboliteReferenceDatabaseEnum;
import files.FilesUtils;
import files.ReadExcelFile;
import files.WriteExcel;
import internalDB.WriteByMetabolitesID;
import kbase.ModelSEEDCompoundsFileReader;
import kbase.Reports;
......@@ -48,8 +52,8 @@ import tcdb.capsules.ReactionContainer;
import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer;
import triageDatabase.TriageNodeLabel;
import triageDatabase.TriageRelationshipType;
import utilities.UniprotAPIExtension;
import utilities.triage_utilities.Properties;
import utilities.triage_utilities.Utilities;
......@@ -95,7 +99,6 @@ public class ProvideTransportReactionsToGenes {
taxonomies = new HashMap<>();
organisms = new HashMap<>();
tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>();
reactionsToIgnore = new HashSet<>();
reactionContainersByID = new HashMap<>();
this.modelMetabolites = modelMetabolites;
......@@ -108,7 +111,7 @@ public class ProvideTransportReactionsToGenes {
findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(queryPath, properties); //replace the null by the query file path
Blast blast = new Blast(true, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize();
......@@ -130,7 +133,7 @@ public class ProvideTransportReactionsToGenes {
while (n != 99) {
// tests();
// tests();
start(); //uncomment
......@@ -161,10 +164,18 @@ public class ProvideTransportReactionsToGenes {
private void tests() {
Node tcNumberNode = service.findTcNumberNode("3.A.1.2.3");
String tc = "3.D.1.1.1";
Node tcNumberNode = service.findTcNumberNode(tc);
System.out.println(tc);
System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
if(tcNumberNode != null) {
System.out.println(tcNumberNode.getAllProperties());
Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
for(Relationship rel : relations) {
......@@ -179,6 +190,8 @@ public class ProvideTransportReactionsToGenes {
}
}
else
System.out.println("nulo");
}
private void findTaxonomyByTaxonomyID(Integer taxID) {
......@@ -226,7 +239,8 @@ public class ProvideTransportReactionsToGenes {
try {
tests();
reactionsToIgnore = new HashSet<>();
// tests();
logger.debug("Searching reactions...");
......@@ -234,7 +248,7 @@ public class ProvideTransportReactionsToGenes {
reactionsByTcNumber = getReactionsByTcNumber(identifyTcNumbersForSearch());
System.out.println(reactionsByTcNumber);
// System.out.println(reactionsByTcNumber);
if(reactionContainersByID.keySet().size() == 0)
logger.warn("No metabolites present in the model are available in the selected reactions!");
......@@ -245,27 +259,27 @@ public class ProvideTransportReactionsToGenes {
String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
// generateFinalResults(similaritiesResults);
// generateFinalResults(similaritiesResults);
generateFinalResultsAuxValidation(similaritiesResults, path);
System.out.println(finalResults);
// System.out.println(finalResults);
logger.debug("Reactions search complete!");
// OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
//
// Container container = new Container(output);
// container.verifyDepBetweenClass();
//
// String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
//
// TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
//
// sbml.writeToFile();
//
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
// validation(container, reactionsIDS, path);
Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
Container container = new Container(output);
container.verifyDepBetweenClass();
String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
sbml.writeToFile();
validation(container, reactionsIDS, path);
}
catch (Exception e) {
......@@ -318,6 +332,8 @@ public class ProvideTransportReactionsToGenes {
for(String queryAccession : resultsByEvalue.keySet()) {
// System.out.println(queryAccession);
Map<String, Set<String>> res = new HashMap<>();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
......@@ -338,8 +354,9 @@ public class ProvideTransportReactionsToGenes {
Map<String, String> locus =
FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt"));
for(String key : finalResultsAux.keySet())
finalResults.put(locus.get(key), finalResultsAux.get(key));
for(String key : finalResultsAux.keySet()) {
finalResults.put(locus.get(key.split("\\s+")[0]), finalResultsAux.get(key));
}
}
/**
......@@ -349,7 +366,6 @@ public class ProvideTransportReactionsToGenes {
private void validation(Container containerTriage, Map<String, String> reactionsIDS, String path) {
try {
Set<String> duplicate = new HashSet<>();
Map<String, String> missingReactions = new HashMap<>();
Map<String, String> missingReactionsWithNames = new HashMap<>();
......@@ -381,6 +397,8 @@ public class ProvideTransportReactionsToGenes {
Set<String> transporters = containerSBML.getReactionsByType(ReactionTypeEnum.Transport);
// containerSBML.getReaction("").hasSameStoichiometry(r, rev_in_account, ignoreCompartments) //comparison
Map<String, MetaboliteCI> metabolites = containerTriage.getMetabolites();
System.out.println("Validating...");
......@@ -520,9 +538,10 @@ public class ProvideTransportReactionsToGenes {
FilesUtils.saveMapInFile3(path + "geneRules.txt", geneRules);
generatedReactionsByTCNumber(path);
}
catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
......@@ -540,14 +559,16 @@ public class ProvideTransportReactionsToGenes {
for(String tcNumber : identifyTcNumbersForSearch) {
// System.out.println(tcNumber);
try {
if(!tcNumbersNotPresentInTRIAGEdatabase.contains(tcNumber)) {
Node tcNumberNode = service.findTcNumberNode(tcNumber);
System.out.println(tcNumber);
System.out.println(tcNumberNode == null);
// System.out.println(tcNumber);
// System.out.println(tcNumberNode == null);
if(tcNumberNode != null) {
......@@ -871,14 +892,20 @@ public class ProvideTransportReactionsToGenes {
String reactionID = node.getProperty(TriageGeneralProperties.ReactionID.toString()).toString();
if(reactionID.equals("TR_001303")) {
System.out.println("Ignorar > " + reactionsToIgnore.contains(reactionID));
// if(reactionID.equals("TR_001303")) {
// System.out.println("Ignorar > " + reactionsToIgnore.contains(reactionID));
//
// }
// System.out.println(reactionsToIgnore);
// System.out.println(reactionID);
// System.out.println(reactionsToIgnore.contains(reactionID));
// System.out.println(node.hasProperty(defaultLabel.toString()));
}
// System.out.println(reactionID);
if(!reactionsToIgnore.contains(reactionID) && node.hasProperty(defaultLabel.toString())) {
System.out.println("entrou");
// System.out.println("entrou");
boolean reversible = Boolean.valueOf(node.getProperty(TriageGeneralProperties.Reversible.toString()).toString());
......@@ -889,18 +916,29 @@ public class ProvideTransportReactionsToGenes {
else
regex = ReactionContainer.IRREV_TOKEN;
// System.out.println(node.getAllProperties());
String reaction = node.getProperty(defaultLabel.toString()).toString();
String reactionWithIDs = node.getProperty(TriageGeneralProperties.Reaction.toString()).toString();
// System.out.println(reactionWithIDs);
String reactionAux = reaction.replace(" (in)", "").replace(" (out)", "").replaceAll(regex, "+");
// System.out.println(reactionAux);
Set<String> compounds = Utilities.convertStringToSetString(reactionAux, "\\s+\\+\\s+");
if(reactionID.equals("TR_001303")) {
System.out.println("ToIgnore");
System.out.println("compounds > " + compounds);
System.out.println(modelMetabolites.keySet().containsAll(compounds));
}
// if(reactionID.equals("TR_001303")) {
// System.out.println("ToIgnore");
// System.out.println("compounds > " + compounds);
// System.out.println(modelMetabolites.keySet().containsAll(compounds));
// }
//
// System.out.println(compounds);
//
// for(String c : compounds)
// System.out.println(c + "\t" + modelMetabolites.containsKey(c));
// System.out.println("Contains all: " + modelMetabolites.keySet().containsAll(compounds));
//
// System.out.println();
if(modelMetabolites.keySet().containsAll(compounds)) {
......@@ -938,9 +976,50 @@ public class ProvideTransportReactionsToGenes {
// e.printStackTrace();
}
// System.out.println(set);
return set;
}
private void generatedReactionsByTCNumber(String path) {
Map<String, Set<String>> newData = new HashMap<>();
for(String accession : finalResults.keySet()) {
for(String tcNumber : finalResults.get(accession).keySet()) {
for(String reactionID : finalResults.get(accession).get(tcNumber)) {
Set<String> tcNumbers = new HashSet<>();
if(newData.containsKey(reactionID))
tcNumbers = newData.get(reactionID);
tcNumbers.add(tcNumber);
newData.put(reactionID, tcNumbers);
}
}
}
List<String[]> table = new ArrayList<>();
for(String reactionID : newData.keySet()) {
String[] line = new String[3];
line[0] = reactionID;
line[1] = reactionContainersByID.get(reactionID).getReactionWithIDs();
line[2] = newData.get(reactionID).toString().replaceAll("\\[", "").replaceAll("\\]", "");
table.add(line);
}
WriteExcel.tableToExcel(table, path + "reactionsByTCNumber.xlsx");
}
private TriageGeneralProperties getDefaultLabel() {
MetaboliteReferenceDatabaseEnum label = properties.getDefaultLabel();
......
......@@ -67,7 +67,7 @@ public class TransportReactionsBuilder {
this.generationsLimit = FilesUtils.readGenerationsLimitFile("C:\\Users\\Davide\\Documents\\reactionsBuilderTriage\\ChildsLimits.txt");
mappingModelSeed = MappingMetabolites.mappingMetabolites(MetaboliteMajorLabel.ModelSeed);