Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 67bffede authored by Davide Lagoa's avatar Davide Lagoa
Browse files

bugs metacyc retriever fixed

parent b2e5cf95
...@@ -225,12 +225,19 @@ public class FetchCompoundsByName { ...@@ -225,12 +225,19 @@ public class FetchCompoundsByName {
for(BiodbMetaboliteNode node : allMetabolites) { for(BiodbMetaboliteNode node : allMetabolites) {
Map<MetaboliteMajorLabel, Set<Long>> counts = new HashMap<>(); Map<MetaboliteMajorLabel, Set<Long>> counts = new HashMap<>();
String entryID = node.getEntry();
if(!node.getEntry().isEmpty()) { if(!entryID.isEmpty()) {
Map<String, Object> nodeProperties = node.getAllProperties(); Map<String, Object> nodeProperties = node.getAllProperties();
Set<String> names = getSynonyms(node, nodeProperties, service); Set<String> names = getSynonyms(node, nodeProperties, service);
if(entryID.matches("META:.*"))
names.add(entryID);
for(String name : names) { for(String name : names) {
......
...@@ -95,7 +95,7 @@ public class WriteByMetabolitesID { ...@@ -95,7 +95,7 @@ public class WriteByMetabolitesID {
new PopulateTransytNeo4jDatabase(data, newData, properties); new PopulateTransytNeo4jDatabase(data, newData, properties);
} }
dataTx.failure(); dataTx.failure();
dataTx.close(); dataTx.close();
service.shutdown(); service.shutdown();
...@@ -127,17 +127,17 @@ public class WriteByMetabolitesID { ...@@ -127,17 +127,17 @@ public class WriteByMetabolitesID {
// } // }
// } // }
Node node = service.getNodeByEntryAndLabel("cpd15496", MetaboliteMajorLabel.ModelSeed); Node node = service.getNodeByEntryAndLabel("META:Glucuronides", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties()); System.out.println(node.getAllProperties());
Iterable<Relationship> rels = node.getRelationships(); // Iterable<Relationship> rels = node.getRelationships();
//
for(Relationship rel : rels) { // for(Relationship rel : rels) {
Long otherNode = rel.getOtherNodeId(node.getId()); // Long otherNode = rel.getOtherNodeId(node.getId());
//
System.out.println(service.getNodeById(otherNode).getAllProperties()); // System.out.println(service.getNodeById(otherNode).getAllProperties());
} // }
...@@ -155,47 +155,49 @@ public class WriteByMetabolitesID { ...@@ -155,47 +155,49 @@ public class WriteByMetabolitesID {
Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) { Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) {
try { try {
Boolean generate = true; Boolean generate = false;
String accession = "P77211"; String accession = "P16433";
// test2(service, null, null);
Retriever.runRetriever(true, true, accession); Retriever.runRetriever(true, true, accession);
if(generate) { if(generate) {
reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>(); reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
String[] accessions = new String[] {accession};
for(String acc : accessions) Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
reactionsData2.put(acc, reactionsData.get(acc)); String[] accessions = new String[] {accession};
/////TRANSYT for(String acc : accessions)
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(reactionsData2, service, data, namesAndIDsContainer, properties).getResults(); //uncomment reactionsData2.put(acc, reactionsData.get(acc));
/////TRANSYT
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(reactionsData2, service, data, namesAndIDsContainer, properties).getResults(); //uncomment
for(TcNumberContainer2 container : newData.get(accession)) {
System.out.println(container.getTcNumber()); for(TcNumberContainer2 container : newData.get(accession)) {
for( Integer id : container.getAllReactionsIds()) { System.out.println(container.getTcNumber());
System.out.println();
System.out.println(container.getReactionContainer(id).getReactionID()); for( Integer id : container.getAllReactionsIds()) {
System.out.println(container.getReactionContainer(id).getMetaReactionID()); System.out.println();
System.out.println(container.getReactionContainer(id).getCompartmentalizedReactionID());
System.out.println(container.getReactionContainer(id).getReaction());
System.out.println(container.getReactionContainer(id).getReactionBase());
System.out.println(container.getReactionContainer(id).getReactionKEGG());
System.out.println(container.getReactionContainer(id).getReactionBiGG());
System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
System.out.println(container.getReactionContainer(id).getReactionModelSEED());
System.out.println(container.getReactionContainer(id).getOriginalReaction());
}
System.out.println(container.getReactionContainer(id).getReactionID());
System.out.println(container.getReactionContainer(id).getMetaReactionID());
System.out.println(container.getReactionContainer(id).getCompartmentalizedReactionID());
System.out.println(container.getReactionContainer(id).getReaction());
System.out.println(container.getReactionContainer(id).getReactionBase());
System.out.println(container.getReactionContainer(id).getReactionKEGG());
System.out.println(container.getReactionContainer(id).getReactionBiGG());
System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
System.out.println(container.getReactionContainer(id).getReactionModelSEED());
System.out.println(container.getReactionContainer(id).getOriginalReaction());
}
System.out.println();
} System.out.println();
}
} }
// JSONFilesUtils.writeJSONTriageReactions(newData); // JSONFilesUtils.writeJSONTriageReactions(newData);
...@@ -236,13 +238,16 @@ public class WriteByMetabolitesID { ...@@ -236,13 +238,16 @@ public class WriteByMetabolitesID {
String entryID = node.getEntry(); String entryID = node.getEntry();
if(!entryID.isEmpty()) { if(!entryID.isEmpty()) {
Map<String, Object> nodeProperties = node.getAllProperties(); Map<String, Object> nodeProperties = node.getAllProperties();
BiosynthMetaboliteProperties bioSynMetab; BiosynthMetaboliteProperties bioSynMetab;
Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service); Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service);
if(entryID.matches("META:.*")) {
synonyms.add(entryID);
}
// String names =""; // String names ="";
// //
// if(node.hasProperty("name")) { // if(node.hasProperty("name")) {
...@@ -266,14 +271,22 @@ public class WriteByMetabolitesID { ...@@ -266,14 +271,22 @@ public class WriteByMetabolitesID {
bioSynMetab = new BiosynthMetaboliteProperties(entryID, nodeProperties.get("major_label").toString(), synonyms); bioSynMetab = new BiosynthMetaboliteProperties(entryID, nodeProperties.get("major_label").toString(), synonyms);
else else
bioSynMetab = new BiosynthMetaboliteProperties(entryID, BiosynthMetaboliteProperties.NONE, synonyms); bioSynMetab = new BiosynthMetaboliteProperties(entryID, BiosynthMetaboliteProperties.NONE, synonyms);
if(node.hasProperty("formula"))
bioSynMetab.setFormula((String) nodeProperties.get("formula"));
if(node.hasProperty("formula")) {
String formula = (String) nodeProperties.get("formula");
if(formula != null && !(formula.equalsIgnoreCase("none") || formula.equalsIgnoreCase("null")))
bioSynMetab.setFormula(formula);
}
if(entryID.equalsIgnoreCase("cpd03805")) if(entryID.equalsIgnoreCase("cpd03805"))
bioSynMetab.setFormula("C6H11O8P"); //there's an error in the database, it needs to be updated bioSynMetab.setFormula("C6H11O8P"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd15391")) else if(entryID.equalsIgnoreCase("cpd15391"))
bioSynMetab.setFormula("C11H17NO11P"); //there's an error in the database, it needs to be updated bioSynMetab.setFormula("C11H17NO11P"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd26871"))
bioSynMetab.setFormula("C4H6N2O3R2"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd28237"))
bioSynMetab.setFormula("C6H8N3O4R3"); //there's an error in the database, it needs to be updated
if(node.hasProperty("remark")) if(node.hasProperty("remark"))
bioSynMetab.setRemark((String) nodeProperties.get("remark")); bioSynMetab.setRemark((String) nodeProperties.get("remark"));
......
...@@ -10,6 +10,7 @@ import org.slf4j.Logger; ...@@ -10,6 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.transyt.service.containers.BiosynthMetabolites; import pt.uminho.ceb.biosystems.transyt.service.containers.BiosynthMetabolites;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.BiosynthMetaboliteProperties;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer; import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcNumberContainer2; import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcNumberContainer2;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcdbMetabolitesContainer; import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcdbMetabolitesContainer;
...@@ -30,17 +31,20 @@ public class IdentifyReactionsMetabolites { ...@@ -30,17 +31,20 @@ public class IdentifyReactionsMetabolites {
private Map<String, Set<String>> metabolites; private Map<String, Set<String>> metabolites;
private Map<String, String[]> forChildsSearch; private Map<String, String[]> forChildsSearch;
private Map<String, String> metabolitesByOriginalName = new HashMap<String, String>(); private Map<String, String> metabolitesByOriginalName = new HashMap<String, String>();
private Map<String, BiosynthMetaboliteProperties> allData;
private Synonyms dictionary; private Synonyms dictionary;
private static final Logger logger = LoggerFactory.getLogger(IdentifyReactionsMetabolites.class); private static final Logger logger = LoggerFactory.getLogger(IdentifyReactionsMetabolites.class);
public IdentifyReactionsMetabolites(Map<String, Set<TcNumberContainer2>> reactionsData, BiosynthMetabolites namesAndIDsContainer, BiodbGraphDatabaseService service) { public IdentifyReactionsMetabolites(Map<String, Set<TcNumberContainer2>> reactionsData, BiosynthMetabolites namesAndIDsContainer,
Map<String, BiosynthMetaboliteProperties> allData, BiodbGraphDatabaseService service) {
this.dictionary = new Synonyms(); this.dictionary = new Synonyms();
this.allData = allData;
metabolites = getMetabolitesFromReactions(reactionsData); metabolites = getMetabolitesFromReactions(reactionsData);
for(String metabolite : metabolites.keySet()) { for(String metabolite : metabolites.keySet()) {
for(String originalMetabolite : metabolites.get(metabolite)) { for(String originalMetabolite : metabolites.get(metabolite)) {
this.metabolitesByOriginalName.put(originalMetabolite, metabolite); this.metabolitesByOriginalName.put(originalMetabolite, metabolite);
...@@ -70,44 +74,50 @@ public class IdentifyReactionsMetabolites { ...@@ -70,44 +74,50 @@ public class IdentifyReactionsMetabolites {
// Set<String> forSearch = new HashSet<>(metabolites); // Set<String> forSearch = new HashSet<>(metabolites);
// metabolites = new HashSet<>(standardizationOfNames1(metabolitesFromFile, dictionary)); // metabolites = new HashSet<>(standardizationOfNames1(metabolitesFromFile, dictionary));
this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer); this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer);
this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs()); this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs());
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron")); // System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
// //
// System.out.println("metabolites >>>" + metabolites.size()); //2078 // System.out.println("metabolites >>>" + metabolites.size()); //2078
// //
// System.out.println("allmetabolites >>>" + namesAndIDsContainer.getMetabolitesIDs().size()); //154224 // System.out.println("allmetabolites >>>" + namesAndIDsContainer.getMetabolitesIDs().size()); //154224
boolean print = false;
// //
identificationByDirectMatch(); identificationByDirectMatch();
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911 if(print)
System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry(); identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975 if(print)
System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
identificationInLowerCase(); identificationInLowerCase();
// System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089 if(print)
System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089
if(print)
identificationReplacingNonAlphanumeric();
identificationReplacingNonAlphanumeric(); if(print)
System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
// System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
identificationIntroducingDandL(); identificationIntroducingDandL();
// System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103 if(print)
System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103
identificationReplacingNonAlphanumericAndInLowercase(); identificationReplacingNonAlphanumericAndInLowercase();
// System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109 if(print)
System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
// System.out.println(metabolites); // System.out.println(metabolites);
...@@ -125,18 +135,22 @@ public class IdentifyReactionsMetabolites { ...@@ -125,18 +135,22 @@ public class IdentifyReactionsMetabolites {
if(id != null) { if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>(); Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc)) if(id.equals(MetaboliteMajorLabel.EcoCyc))
map.put(ids.get(id), MetaboliteMajorLabel.MetaCyc); map.put(ids.get(id), MetaboliteMajorLabel.MetaCyc);
else else
map.put(ids.get(id), id); map.put(ids.get(id), id);
// if(metabolite.equalsIgnoreCase("quinol") || metabolite.equalsIgnoreCase("quinone"))
// System.out.println();
// if(allData.containsKey(ids.get(id)) && allData.get(ids.get(id)).getFormula() != null
// && !allData.get(ids.get(id)).getFormula().equalsIgnoreCase("NONE")) {
tcdbMetabolitesIDs.put(metabolite, map); tcdbMetabolitesIDs.put(metabolite, map);
metabolites.remove(metabolite);
metabolites.remove(metabolite); // }
} }
} }
...@@ -355,9 +369,9 @@ public class IdentifyReactionsMetabolites { ...@@ -355,9 +369,9 @@ public class IdentifyReactionsMetabolites {
private void identificationByDirectMatch() { private void identificationByDirectMatch() {
for(String metabolite : new HashSet<>(metabolites.keySet())) { for(String metabolite : new HashSet<>(metabolites.keySet())) {
if(metabolite.equals("light")) // if(metabolite.equals("light"))
System.out.println(); // System.out.println();
try { try {
if(metabolite.equalsIgnoreCase("galactonate")) if(metabolite.equalsIgnoreCase("galactonate"))
...@@ -422,7 +436,7 @@ public class IdentifyReactionsMetabolites { ...@@ -422,7 +436,7 @@ public class IdentifyReactionsMetabolites {
String[] reactions = new String[] {reactionContainer.getReaction(), String[] reactions = new String[] {reactionContainer.getReaction(),
reactionContainer.getOriginalReaction()}; reactionContainer.getOriginalReaction()};
for(String reaction : reactions) { for(String reaction : reactions) {
for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction, for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction,
dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) { dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) {
...@@ -455,7 +469,7 @@ public class IdentifyReactionsMetabolites { ...@@ -455,7 +469,7 @@ public class IdentifyReactionsMetabolites {
String[] metabs = reaction.split(" \\+ "); String[] metabs = reaction.split(" \\+ ");
for(String metab : metabs) { for(String metab : metabs) {
if(!metab.matches("^(\\d+-).+")) { if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", ""); metab = metab.replaceAll("^(\\d+)", "");
} }
...@@ -563,7 +577,7 @@ public class IdentifyReactionsMetabolites { ...@@ -563,7 +577,7 @@ public class IdentifyReactionsMetabolites {
} }
String originalWord = word; String originalWord = word;
if(word.matches("ic acids*")) if(word.matches("ic acids*"))
word = word.replaceAll("ic acids*", "ate"); word = word.replaceAll("ic acids*", "ate");
......
...@@ -56,7 +56,7 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties; ...@@ -56,7 +56,7 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class ProvideTransportReactionsToGenes { public class ProvideTransportReactionsToGenes {
private static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber"; public static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber";
private Map<String, List<AlignmentCapsule>> blastResults; private Map<String, List<AlignmentCapsule>> blastResults;
private RestNeo4jGraphDatabase service; private RestNeo4jGraphDatabase service;
...@@ -543,23 +543,16 @@ public class ProvideTransportReactionsToGenes { ...@@ -543,23 +543,16 @@ public class ProvideTransportReactionsToGenes {
private void generateFinalResultsAuxValidation(Map<String, Set<String>> similaritiesResults, private void generateFinalResultsAuxValidation(Map<String, Set<String>> similaritiesResults,
Map<String, GeneContainer> genesContainers) { Map<String, GeneContainer> genesContainers) {
// Map<String, Map<String, Set<String>>> finalResultsAux = new HashMap<>();
finalResults = new HashMap<>(); finalResults = new HashMap<>();
// resultsByEvalue = new HashMap<String, Set<String>>(); //DELETE ME!!!!!!!!!!!!!!1
// Map<String, String> locus = for(String queryAccession : blastResults.keySet()) {
// FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt"));
for(String queryAccession : resultsByEvalue.keySet()) {
if(queryAccession.equals("b0341"))
System.out.println();
Set<String> reactionsAlreadyAssigned = new HashSet<>(); Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>(); Set<String> accepted = new HashSet<>();
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
Map<String, Set<String>> res = new HashMap<>(); Map<String, Set<String>> res = new HashMap<>();
boolean save = false; boolean save = false;
...@@ -570,50 +563,54 @@ public class ProvideTransportReactionsToGenes { ...@@ -570,50 +563,54 @@ public class ProvideTransportReactionsToGenes {
// for(String tc : reactionsByTcNumberForAnnotation.keySet()) // for(String tc : reactionsByTcNumberForAnnotation.keySet())
// System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc)); // System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc));
for(String tcNumber : resultsByEvalue.get(queryAccession)) { if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){ for(String tcNumber : resultsByEvalue.get(queryAccession)) {
Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber); if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){
// if(!reactions.isEmpty()) Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber);
// System.out.println(reactions);
// System.out.println("dddd " + reactions); // if(!reactions.isEmpty())
// System.out.println(reactions);
Iterator<String> iterator = reactions.iterator(); // System.out.println("dddd " + reactions);
while (iterator.hasNext()) { Iterator<String> iterator = reactions.iterator();
String id = iterator.next();