Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 67bffede authored by Davide Lagoa's avatar Davide Lagoa
Browse files

bugs metacyc retriever fixed

parent b2e5cf95
......@@ -225,12 +225,19 @@ public class FetchCompoundsByName {
for(BiodbMetaboliteNode node : allMetabolites) {
Map<MetaboliteMajorLabel, Set<Long>> counts = new HashMap<>();
String entryID = node.getEntry();
if(!node.getEntry().isEmpty()) {
if(!entryID.isEmpty()) {
Map<String, Object> nodeProperties = node.getAllProperties();
Set<String> names = getSynonyms(node, nodeProperties, service);
if(entryID.matches("META:.*"))
names.add(entryID);
for(String name : names) {
......
......@@ -95,7 +95,7 @@ public class WriteByMetabolitesID {
new PopulateTransytNeo4jDatabase(data, newData, properties);
}
dataTx.failure();
dataTx.close();
service.shutdown();
......@@ -127,17 +127,17 @@ public class WriteByMetabolitesID {
// }
// }
Node node = service.getNodeByEntryAndLabel("cpd15496", MetaboliteMajorLabel.ModelSeed);
Node node = service.getNodeByEntryAndLabel("META:Glucuronides", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties());
Iterable<Relationship> rels = node.getRelationships();
for(Relationship rel : rels) {
Long otherNode = rel.getOtherNodeId(node.getId());
System.out.println(service.getNodeById(otherNode).getAllProperties());
}
// Iterable<Relationship> rels = node.getRelationships();
//
// for(Relationship rel : rels) {
// Long otherNode = rel.getOtherNodeId(node.getId());
//
// System.out.println(service.getNodeById(otherNode).getAllProperties());
// }
......@@ -155,47 +155,49 @@ public class WriteByMetabolitesID {
Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) {
try {
Boolean generate = true;
String accession = "P77211";
Boolean generate = false;
String accession = "P16433";
// test2(service, null, null);
Retriever.runRetriever(true, true, accession);
if(generate) {
reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
String[] accessions = new String[] {accession};
reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
for(String acc : accessions)
reactionsData2.put(acc, reactionsData.get(acc));
Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
String[] accessions = new String[] {accession};
/////TRANSYT
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(reactionsData2, service, data, namesAndIDsContainer, properties).getResults(); //uncomment
for(String acc : accessions)
reactionsData2.put(acc, reactionsData.get(acc));
/////TRANSYT
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(reactionsData2, service, data, namesAndIDsContainer, properties).getResults(); //uncomment
for(TcNumberContainer2 container : newData.get(accession)) {
System.out.println(container.getTcNumber());
for(TcNumberContainer2 container : newData.get(accession)) {
for( Integer id : container.getAllReactionsIds()) {
System.out.println();
System.out.println(container.getTcNumber());
System.out.println(container.getReactionContainer(id).getReactionID());
System.out.println(container.getReactionContainer(id).getMetaReactionID());
System.out.println(container.getReactionContainer(id).getCompartmentalizedReactionID());
System.out.println(container.getReactionContainer(id).getReaction());
System.out.println(container.getReactionContainer(id).getReactionBase());
System.out.println(container.getReactionContainer(id).getReactionKEGG());
System.out.println(container.getReactionContainer(id).getReactionBiGG());
System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
System.out.println(container.getReactionContainer(id).getReactionModelSEED());
System.out.println(container.getReactionContainer(id).getOriginalReaction());
}
for( Integer id : container.getAllReactionsIds()) {
System.out.println();
System.out.println(container.getReactionContainer(id).getReactionID());
System.out.println(container.getReactionContainer(id).getMetaReactionID());
System.out.println(container.getReactionContainer(id).getCompartmentalizedReactionID());
System.out.println(container.getReactionContainer(id).getReaction());
System.out.println(container.getReactionContainer(id).getReactionBase());
System.out.println(container.getReactionContainer(id).getReactionKEGG());
System.out.println(container.getReactionContainer(id).getReactionBiGG());
System.out.println(container.getReactionContainer(id).getReactionMetaCyc());
System.out.println(container.getReactionContainer(id).getReactionModelSEED());
System.out.println(container.getReactionContainer(id).getOriginalReaction());
}
System.out.println();
}
System.out.println();
}
}
// JSONFilesUtils.writeJSONTriageReactions(newData);
......@@ -236,13 +238,16 @@ public class WriteByMetabolitesID {
String entryID = node.getEntry();
if(!entryID.isEmpty()) {
Map<String, Object> nodeProperties = node.getAllProperties();
BiosynthMetaboliteProperties bioSynMetab;
Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service);
if(entryID.matches("META:.*")) {
synonyms.add(entryID);
}
// String names ="";
//
// if(node.hasProperty("name")) {
......@@ -266,14 +271,22 @@ public class WriteByMetabolitesID {
bioSynMetab = new BiosynthMetaboliteProperties(entryID, nodeProperties.get("major_label").toString(), synonyms);
else
bioSynMetab = new BiosynthMetaboliteProperties(entryID, BiosynthMetaboliteProperties.NONE, synonyms);
if(node.hasProperty("formula"))
bioSynMetab.setFormula((String) nodeProperties.get("formula"));
if(node.hasProperty("formula")) {
String formula = (String) nodeProperties.get("formula");
if(formula != null && !(formula.equalsIgnoreCase("none") || formula.equalsIgnoreCase("null")))
bioSynMetab.setFormula(formula);
}
if(entryID.equalsIgnoreCase("cpd03805"))
bioSynMetab.setFormula("C6H11O8P"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd15391"))
bioSynMetab.setFormula("C11H17NO11P"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd26871"))
bioSynMetab.setFormula("C4H6N2O3R2"); //there's an error in the database, it needs to be updated
else if(entryID.equalsIgnoreCase("cpd28237"))
bioSynMetab.setFormula("C6H8N3O4R3"); //there's an error in the database, it needs to be updated
if(node.hasProperty("remark"))
bioSynMetab.setRemark((String) nodeProperties.get("remark"));
......
......@@ -10,6 +10,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.transyt.service.containers.BiosynthMetabolites;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.BiosynthMetaboliteProperties;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcNumberContainer2;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcdbMetabolitesContainer;
......@@ -30,17 +31,20 @@ public class IdentifyReactionsMetabolites {
private Map<String, Set<String>> metabolites;
private Map<String, String[]> forChildsSearch;
private Map<String, String> metabolitesByOriginalName = new HashMap<String, String>();
private Map<String, BiosynthMetaboliteProperties> allData;
private Synonyms dictionary;
private static final Logger logger = LoggerFactory.getLogger(IdentifyReactionsMetabolites.class);
public IdentifyReactionsMetabolites(Map<String, Set<TcNumberContainer2>> reactionsData, BiosynthMetabolites namesAndIDsContainer, BiodbGraphDatabaseService service) {
public IdentifyReactionsMetabolites(Map<String, Set<TcNumberContainer2>> reactionsData, BiosynthMetabolites namesAndIDsContainer,
Map<String, BiosynthMetaboliteProperties> allData, BiodbGraphDatabaseService service) {
this.dictionary = new Synonyms();
this.allData = allData;
metabolites = getMetabolitesFromReactions(reactionsData);
for(String metabolite : metabolites.keySet()) {
for(String originalMetabolite : metabolites.get(metabolite)) {
this.metabolitesByOriginalName.put(originalMetabolite, metabolite);
......@@ -70,44 +74,50 @@ public class IdentifyReactionsMetabolites {
// Set<String> forSearch = new HashSet<>(metabolites);
// metabolites = new HashSet<>(standardizationOfNames1(metabolitesFromFile, dictionary));
this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer);
this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs());
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
//
// System.out.println("metabolites >>>" + metabolites.size()); //2078
//
// System.out.println("allmetabolites >>>" + namesAndIDsContainer.getMetabolitesIDs().size()); //154224
boolean print = false;
//
identificationByDirectMatch();
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
if(print)
System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry();
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
if(print)
System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
identificationInLowerCase();
// System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089
if(print)
System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089
if(print)
identificationReplacingNonAlphanumeric();
identificationReplacingNonAlphanumeric();
// System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
if(print)
System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
identificationIntroducingDandL();
// System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103
if(print)
System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103
identificationReplacingNonAlphanumericAndInLowercase();
// System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
if(print)
System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
// System.out.println(metabolites);
......@@ -125,18 +135,22 @@ public class IdentifyReactionsMetabolites {
if(id != null) {
// System.out.println(id + "\t" + ids.get(id));
Map<String, MetaboliteMajorLabel> map = new HashMap<>();
if(id.equals(MetaboliteMajorLabel.EcoCyc))
map.put(ids.get(id), MetaboliteMajorLabel.MetaCyc);
else
map.put(ids.get(id), id);
// if(metabolite.equalsIgnoreCase("quinol") || metabolite.equalsIgnoreCase("quinone"))
// System.out.println();
// if(allData.containsKey(ids.get(id)) && allData.get(ids.get(id)).getFormula() != null
// && !allData.get(ids.get(id)).getFormula().equalsIgnoreCase("NONE")) {
tcdbMetabolitesIDs.put(metabolite, map);
metabolites.remove(metabolite);
tcdbMetabolitesIDs.put(metabolite, map);
metabolites.remove(metabolite);
// }
}
}
......@@ -355,9 +369,9 @@ public class IdentifyReactionsMetabolites {
private void identificationByDirectMatch() {
for(String metabolite : new HashSet<>(metabolites.keySet())) {
if(metabolite.equals("light"))
System.out.println();
// if(metabolite.equals("light"))
// System.out.println();
try {
if(metabolite.equalsIgnoreCase("galactonate"))
......@@ -422,7 +436,7 @@ public class IdentifyReactionsMetabolites {
String[] reactions = new String[] {reactionContainer.getReaction(),
reactionContainer.getOriginalReaction()};
for(String reaction : reactions) {
for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction,
dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) {
......@@ -455,7 +469,7 @@ public class IdentifyReactionsMetabolites {
String[] metabs = reaction.split(" \\+ ");
for(String metab : metabs) {
if(!metab.matches("^(\\d+-).+")) {
metab = metab.replaceAll("^(\\d+)", "");
}
......@@ -563,7 +577,7 @@ public class IdentifyReactionsMetabolites {
}
String originalWord = word;
if(word.matches("ic acids*"))
word = word.replaceAll("ic acids*", "ate");
......
......@@ -56,7 +56,7 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class ProvideTransportReactionsToGenes {
private static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber";
public static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber";
private Map<String, List<AlignmentCapsule>> blastResults;
private RestNeo4jGraphDatabase service;
......@@ -543,23 +543,16 @@ public class ProvideTransportReactionsToGenes {
private void generateFinalResultsAuxValidation(Map<String, Set<String>> similaritiesResults,
Map<String, GeneContainer> genesContainers) {
// Map<String, Map<String, Set<String>>> finalResultsAux = new HashMap<>();
finalResults = new HashMap<>();
// resultsByEvalue = new HashMap<String, Set<String>>(); //DELETE ME!!!!!!!!!!!!!!1
// Map<String, String> locus =
// FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt"));
for(String queryAccession : resultsByEvalue.keySet()) {
if(queryAccession.equals("b0341"))
System.out.println();
for(String queryAccession : blastResults.keySet()) {
Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>();
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
Map<String, Set<String>> res = new HashMap<>();
boolean save = false;
......@@ -570,50 +563,54 @@ public class ProvideTransportReactionsToGenes {
// for(String tc : reactionsByTcNumberForAnnotation.keySet())
// System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc));
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber);
if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){
// if(!reactions.isEmpty())
// System.out.println(reactions);
Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber);
// System.out.println("dddd " + reactions);
// if(!reactions.isEmpty())
// System.out.println(reactions);
Iterator<String> iterator = reactions.iterator();
// System.out.println("dddd " + reactions);
while (iterator.hasNext()) {
Iterator<String> iterator = reactions.iterator();
String id = iterator.next();
while (iterator.hasNext()) {
if(!reactionsAlreadyAssigned.contains(id) || accepted.contains(id)) {
String id = iterator.next();
reactionsAlreadyAssigned.add(id);
accepted.add(id);
if(!reactionsAlreadyAssigned.contains(id) || accepted.contains(id)) {
if(!res.containsKey(tcNumber))
res.put(tcNumber, new HashSet<String>());
reactionsAlreadyAssigned.add(id);
accepted.add(id);
Set<String> set = res.get(tcNumber);
set.add(id);
if(!res.containsKey(tcNumber))
res.put(tcNumber, new HashSet<String>());
Set<String> set = res.get(tcNumber);
set.add(id);
res.put(tcNumber, set);
res.put(tcNumber, set);
save = true;
save = true;
}
}
}
}
}
}
if(res.size() > 0) {
if(res.size() > 0) {
reportByEvalueAux.put(queryAccession, new HashMap<>(res));
reportByEvalueAux.put(queryAccession, new HashMap<>(res));
}
}
if(similaritiesResults.containsKey(queryAccession) && !similaritiesResults.get(queryAccession).isEmpty()) {
Set<String> toAdd = new HashSet<>();
......@@ -1609,10 +1606,10 @@ public class ProvideTransportReactionsToGenes {
metabolitesNames.putAll(service.getTemporaryCompoundsNames());
metabolitesFormulas.putAll(service.getTemporaryCompoundsFormulas());
// for(String name : metabolitesNames.values())
// if(GENERATIONS_EXCEPTION_FILE.containsKey(name)) {
// return null;
// }
// for(String name : metabolitesNames.values())
// if(GENERATIONS_EXCEPTION_FILE.containsKey(name)) {
// return null;
// }
// System.out.println(reactionWithIDs);
// System.out.println(reactionAux);
......
......@@ -15,6 +15,7 @@ import pt.uminho.ceb.biosystems.merlin.utilities.Pair;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.transyt.service.internalDB.WriteByMetabolitesID;
import pt.uminho.ceb.biosystems.transyt.service.neo4jRest.RestNeo4jGraphDatabase;
import pt.uminho.ceb.biosystems.transyt.service.reactions.ProvideTransportReactionsToGenes;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.GeneContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
......@@ -455,6 +456,8 @@ public class GPRAssociations {
if(!geneRule.contains(g) && complexesTCDB.get(tcNumber) != null && complexesTCDB.get(tcNumber).size() < 2) //avoids creating incomplete GPRs
geneRule = geneRule.concat(" or ").concat(g);
else if(tcNumber.equals(ProvideTransportReactionsToGenes.NO_TCNUMBER_ASSOCIATED))
geneRule = geneRule.concat(" or ").concat(g);
}
geneRule = geneRule.replaceAll("^\\sor\\s", "");
......
......@@ -41,7 +41,7 @@ public class MetabolitesChilds {
LinkedList<Long> toVisit = new LinkedList<>();
Node node = service.getNodeById(id);
// relation = node.getRelationships(Direction.INCOMING);
// for(Relationship rel : relation) {
......@@ -79,20 +79,28 @@ public class MetabolitesChilds {
Node[] nodes = rel.getNodes();
if(nodes[1].getId() == currentNodeID && rel.getType().toString().equals(MetaboliteRelationshipType.instance_of.toString())) {
try {
if(nodes[1].getId() == currentNodeID && rel.getType().toString().equals(MetaboliteRelationshipType.instance_of.toString())) {
Integer currentGeneration = generation.get(currentNodeID);
if(currentGeneration < generationLimit || generationLimit == ALL_CODE) {
Integer currentGeneration = generation.get(currentNodeID);
if(!visited.contains(nodes[0].getId())
&& !nodes[0].getProperty("entry").toString().matches("(?i).*modified.*")) {
if(currentGeneration < generationLimit || generationLimit == ALL_CODE) {
if(!nodes[0].hasProperty("name")
|| (nodes[0].hasProperty("name") && !nodes[0].getProperty("name").toString().matches("(?i).*modified.*"))) {
if(!visited.contains(nodes[0].getId())
&& !node.getProperty("entry").toString().matches("(?i).*modified.*")
&& !node.getProperty("name").toString().matches("(?i).*modified.*")) {
toVisit.add(nodes[0].getId());
generation.put(nodes[0].getId(), currentGeneration + 1);
toVisit.add(nodes[0].getId());
generation.put(nodes[0].getId(), currentGeneration + 1);
}
}
}
}
} catch (Exception e) {
logger.error(e.getMessage());
// e.printStackTrace();
}
}
......@@ -100,11 +108,11 @@ public class MetabolitesChilds {
}
if(missingChilds != null && metabolite != null && missingChilds.containsKey(metabolite)) {
for(String child : missingChilds.get(metabolite)) {
Long missingId = identifyNode("META:"+child, MetaboliteMajorLabel.MetaCyc, service);
if(missingId != null)