Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit ec1c4d65 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

minor changes

parent 17f64607
......@@ -127,11 +127,11 @@ public class WriteByMetabolitesID {
// }
// }
Node node = service.getNodeByEntryAndLabel("META:Ions", MetaboliteMajorLabel.MetaCyc);
Node node = service.getNodeByEntryAndLabel("META:Amino-Acids-20", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties());
// Node node = service.getNodeByEntryAndLabel("cpd15460", MetaboliteMajorLabel.ModelSeed);
// Node node = service.getNodeByEntryAndLabel("cpd11420", MetaboliteMajorLabel.ModelSeed);
//
// System.out.println(node.getAllProperties());
......@@ -161,7 +161,7 @@ public class WriteByMetabolitesID {
try {
Boolean generate = true;
String accession = "Q8DL32"; //
String accession = "Q9X0S4"; //
// test2(service, null, null);
......
......@@ -255,9 +255,6 @@ public class Reports {
reportPath = reportPath.concat("scoresMethod1.txt");
System.out.println(reportByEvalue.containsKey("b3201"));
System.out.println(reportByEvalueAux.containsKey("b3201"));
try {
PrintWriter writer = new PrintWriter(reportPath, "UTF-8");
......
......@@ -56,6 +56,8 @@ public class IdentifyReactionsMetabolites {
logger.info("Total found: {}", tcdbMetabolitesIDs.size());
System.out.println(tcdbMetabolitesIDs);
}
/**
......@@ -718,6 +720,9 @@ public class IdentifyReactionsMetabolites {
entry[0] = ids.get(MetaboliteMajorLabel.MetaCyc);
entry[1] = MetaboliteMajorLabel.MetaCyc.toString();
if(metabolite.equals("META:Amino-Acids-20"))
entry[0] = metabolite;
forChildsSearch.put(metabolite, entry);
}
else if(ids.containsKey(MetaboliteMajorLabel.EcoCyc)) { //ecocyc after metacyc
......@@ -727,6 +732,9 @@ public class IdentifyReactionsMetabolites {
entry[0] = ids.get(MetaboliteMajorLabel.MetaCyc);
entry[1] = MetaboliteMajorLabel.MetaCyc.toString();
if(metabolite.equals("META:Amino-Acids-20"))
entry[0] = metabolite;
forChildsSearch.put(metabolite, entry);
}
......
......@@ -20,6 +20,8 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Node;
import pt.uminho.ceb.biosystems.merlin.transporters.core.utils.Enumerators.TransportType;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.merlin.utilities.io.FileUtils;
......@@ -31,6 +33,8 @@ import pt.uminho.ceb.biosystems.mew.biocomponents.container.components.ReactionT
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.exceptions.ReactionAlreadyExistsException;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLLevel3Reader;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLReader;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.reactionsGenerator.GenerateTransportReactions;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.utilities.ProcessTcdbMetabolites;
import pt.uminho.ceb.biosystems.transyt.service.blast.Blast;
import pt.uminho.ceb.biosystems.transyt.service.internalDB.WriteByMetabolitesID;
import pt.uminho.ceb.biosystems.transyt.service.kbase.ModelSEEDRealatedOperations;
......@@ -50,10 +54,12 @@ import pt.uminho.ceb.biosystems.transyt.utilities.capsules.GeneContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.Organism;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.Subunits;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcdbMetabolitesContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.Enumerators;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.Enumerators.TypeOfTransporter;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.MetaboliteReferenceDatabaseEnum;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.JSONFilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.WriteExcel;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
......@@ -102,7 +108,7 @@ public class ProvideTransportReactionsToGenes {
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
public ProvideTransportReactionsToGenes(String workFolderID, Organism organismProperties, String queryPath, String modelPath,
String metabolitesPath, String resultPath, Properties properties) {
String metabolitesPath, String resultPath, Properties properties, boolean searchReactions) {
logger.info("TranSyT initialized!");
......@@ -127,46 +133,32 @@ public class ProvideTransportReactionsToGenes {
this.subunits = new Subunits();
setDefaultRelationshipsToSearch();
// findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(workFolderID, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
logger.info("Beginning transaction with neo4j TranSyT database...");
service = TransytNeo4jInitializer.getDatabaseService(properties);
// service = new TransytGraphDatabaseService(graphDatabaseService);
logger.info("Beginning transaction with neo4j TranSyT database...");
resultPath = resultPath.concat("results/");
// Transaction dataTx = graphDatabaseService.beginTx();
//////////////////////////
// @SuppressWarnings("resource")
// Scanner reader = new Scanner(System.in);
//
// int n = 1;
//
//
// while (n != 99) {
File resultsFile = new File(resultPath);
if(!resultsFile.exists())
resultsFile.mkdirs();
start(metabolitesPath, queryPath, resultPath);
// resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
// System.out.println("Insert a number to repeat or 99 to finish");
//
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
/////////////////////////
// dataTx.close();
// service.shutdown();
// graphDatabaseService.shutdown();
Map<String, GeneContainer> genesContainers = buildGenesContainers();
if(searchReactions)
annotateReactionsToGenes(resultPath, metabolitesPath, genesContainers);
else
annotateTransportProteinsFunction(resultPath, genesContainers);
service.close();
......@@ -180,21 +172,30 @@ public class ProvideTransportReactionsToGenes {
}
}
private void start(String metabolitesPath,String queryPath, String resultPath) {
private void start(String metabolitesPath,String queryPath, String resultPath, boolean searchReactions) {
try {
// String path = new File(queryPath).getParent().concat("/");
resultPath = resultPath.concat("results/");
File resultsFile = new File(resultPath);
if(!resultsFile.exists())
resultsFile.mkdirs();
}
catch (Exception e) {
e.printStackTrace();
}
}
/**
* Models' reactions annotation method
*
* @param resultPath
* @param genesContainers
* @throws Exception
*/
private void annotateReactionsToGenes(String resultPath, String metabolitesPath, Map<String, GeneContainer> genesContainers) throws Exception {
reactionsToIgnore = new HashSet<>();
// tests();
this.modelMetabolites = Tools.readModelMetabolitesFromSBML(modelPath); //Alterar isto para ler do container directamente
......@@ -213,29 +214,14 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Searching reactions...");
// resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
Set<String> tcNumbers = identifyTcNumbersForSearch();
// this.subunits.setSubunitsDatabase(findSubunitsInDatabase(tcNumbers));
logger.info("Searching reactions in {} tcNumbers", tcNumbers.size());
reactionsByTcNumber = getReactionsByTcNumber(tcNumbers);
// System.out.println(reactionsByTcNumber);
if(reactionContainersByID.keySet().size() == 0 && modelMetabolites != null)
logger.warn("No metabolites present in the model are available in the selected reactions!");
Map<String, GeneContainer> genesContainers = buildGenesContainers();
// System.out.println("Main Reactions -> " + mainReactions.size());
//
// for(String key : mainReactions.keySet())
// System.out.println(key + "\t" + mainReactions.get(key));
Map<String, Set<String>> similaritiesResults = new HashMap<>(); //the results of this map will be sorted by similarity
if(!properties.isIgnoreMethod2())
......@@ -273,7 +259,8 @@ public class ProvideTransportReactionsToGenes {
// System.out.println("here");
OutputTransytFormat output = new OutputTransytFormat(finalResults, metabolitesFormulas, reactionContainersByID, geneRules, metabolitesNames);
OutputTransytFormat output = new OutputTransytFormat(resultPath + "/reactions_references.txt", finalResults, metabolitesFormulas,
reactionContainersByID, geneRules, metabolitesNames);
Container container = new Container(output);
......@@ -312,18 +299,55 @@ public class ProvideTransportReactionsToGenes {
FileUtils.createZipFile(fileResults.getParent(), fileResults.getParentFile().getParent().concat("/results.zip"), 5);
logger.info("zip file created!");
//
}
//
// validation(container, path);
/**
* Method to just try to annotate the genes with a TC number and short description, ignoring reactions association.
*
* @param resultPath
* @param genesContainers
* @throws Exception
*/
private void annotateTransportProteinsFunction(String resultPath, Map<String, GeneContainer> genesContainers) throws Exception {
List<String[]> table = JSONFilesUtils.readTCDBScrapedInfo();
// constructSBMLKBase(container, path);
Map<String, String> descriptions = ProcessTcdbMetabolites.getTCDescriptions(table);
Map<String, String> annotationsByGene = new HashMap<>();
for(String queryAccession : this.resultsByEvalue.keySet()) {
String annotation = null;
if(genesContainers.get(queryAccession).getAnnotatedFamily() != null) {
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
if(tcNumber.contains(tcFamily)){
String shortDescription = descriptions.get(tcNumber).split("\\.")[0];
if(annotation != null)
annotation = annotation + "### ";
else
annotation = "";
annotation = annotation + tcNumber + " - " + shortDescription;
}
}
catch (Exception e) {
e.printStackTrace();
}
}
if(annotation != null)
annotationsByGene.put(queryAccession, annotation);
}
FilesUtils.saveMapInFile(resultPath + "transport_genes_annotation.txt", annotationsByGene);
}
private void countReactionsByGene(Map<String, String> geneRules, Map<String, ReactionCI> sbmlReactions) {
......@@ -452,35 +476,6 @@ public class ProvideTransportReactionsToGenes {
}
}
// /**
// * Merge the results of both methods
// *
// * @param similaritiesResults
// */
// private void generateFinalResults(Map<String, Map<String, Set<String>>> similaritiesResults) {
//
// finalResults = new HashMap<>(similaritiesResults);
//
// for(String queryAccession : resultsByEvalue.keySet()) {
//
// Map<String, Set<String>> res = new HashMap<>();
//
// for(String tcNumber : resultsByEvalue.get(queryAccession)) {
//
// if(reactionsByTcNumberForAnnotation.containsKey(tcNumber))
// res.put(tcNumber, reactionsByTcNumberForAnnotation.get(tcNumber));
//
// // if(reactionsByTcNumber.containsKey(tcNumber))
// // res.put(tcNumber, reactionsByTcNumber.get(tcNumber));
//
// }
//
//
//
// finalResults.put(queryAccession, res);
// }
// }
private Map<String, Set<String>> getReactionsForGenesByEvalue() {
Map<String, Set<String>> results = new LinkedHashMap<>();
......@@ -798,7 +793,7 @@ public class ProvideTransportReactionsToGenes {
private void generateFinalResultsAuxValidation_filter_reactions(Map<String, Set<String>> similaritiesResults,
Map<String, GeneContainer> genesContainers) {
// Map<String, String> similaritiesReport = new HashMap<>();
// Map<String, String> similaritiesReport = new HashMap<>();
Set<String> compounds = new HashSet<>();
......@@ -809,7 +804,7 @@ public class ProvideTransportReactionsToGenes {
for(String queryAccession : blastResults.keySet()) {
// if(queryAccession.equals("b3201"))
// if(queryAccession.equals("b4321"))
// System.out.println();
Set<String> reactionsAlreadyAssigned = new HashSet<>();
......@@ -908,7 +903,7 @@ public class ProvideTransportReactionsToGenes {
String id = iterator.next();
if(!reactionsAlreadyAssigned.contains(id) &&
if(this.reactionContainersByID.containsKey(id) && !reactionsAlreadyAssigned.contains(id) &&
(transportType == null || this.reactionContainersByID.get(id).getTransportType().equals(transportType))) {
for(String compound : this.reactionContainersByID.get(id).getMetabolites()) {
......@@ -924,12 +919,11 @@ public class ProvideTransportReactionsToGenes {
break;
}
}
}
}
// if(toAdd.size() > 0)
// similaritiesReport.put(queryAccession.split("\\s+")[0], toAdd.toString());
// if(toAdd.size() > 0)
// similaritiesReport.put(queryAccession.split("\\s+")[0], toAdd.toString());
if(!toAdd.isEmpty())
res.put(NO_TCNUMBER_ASSOCIATED, toAdd);
......@@ -954,8 +948,6 @@ public class ProvideTransportReactionsToGenes {
finalResults.put(name, res);
}
}
// System.exit(0);
}
/**
......@@ -1476,6 +1468,7 @@ public class ProvideTransportReactionsToGenes {
Set<TransytNode> reactionNodes = new HashSet<>();
Set<String> ids = new HashSet<>();
Set<String> confidenceLeves = new HashSet<>();
if(tcHasReaction.containsKey(tcNumber)) {
......@@ -1489,6 +1482,9 @@ public class ProvideTransportReactionsToGenes {
boolean rev = Boolean.valueOf(rel.getProperty(TransytGeneralProperties.Reversible));
if(tcNumber.startsWith("9.")) //just to not waste time, change in the future as needed
confidenceLeves.add(rel.getProperty(TransytGeneralProperties.Confidence_Level));
String reactionID = node.getProperty(TransytGeneralProperties.ReactionID);
reversibility.put(reactionID, rev);
......@@ -1500,10 +1496,32 @@ public class ProvideTransportReactionsToGenes {
}
containers.put(tcNumber, ids);
// if(tcNumber.equals("2.A.86.1.4"))
// System.out.println();
// if(tcNumber.equals("2.A.86.1.4"))
// System.out.println();
Set<String> reactionsToSave = null;
boolean search = false;
Set<String> reactionsToSave = getAllNodesReactionsIDsForAnnotation(reactionNodes, reversibility,
if(tcNumber.startsWith("9.")) {
if(properties.isAcceptUnknownFamily()) {
search = true;
}
else {
for(String confidence : confidenceLeves) { //cycle because not sure if there are several sud-levels inside this level
if(confidence.startsWith(GenerateTransportReactions.METACYC_CONFIDENCE_LEVEL)) {
search = true;
break;
}
}
}
}
else {
search = true;
}
if(search)
reactionsToSave = getAllNodesReactionsIDsForAnnotation(reactionNodes, reversibility,
reactionHasReactant, reactionHasProduct, transportTypes);
if(reactionsToSave != null)
......@@ -1571,104 +1589,6 @@ public class ProvideTransportReactionsToGenes {
return toSearchTcNumber;
}
// /**
// *
// */
// private void getTaxonomy(String uniprotAccession, TransytNode node){
//
// if(node != null) {
//
// // String accession = node.getP7roperty(TriageGeneralProperties.Accession_Number.toString()).toString();
//
// // System.out.println(node.getAllProperties());
//
// String[] tax = node.getProperty(TransytGeneralProperties.Taxonomy).replaceAll("\\[", "").replaceAll("\\]", "").split(", ");
//
// taxonomies.put(uniprotAccession, tax);
//
// organisms.put(uniprotAccession, node.getProperty(TransytGeneralProperties.Organism));
//
// }
//
// // System.out.println(taxonomies.size());
//
// // for(String tax : taxonomies.keySet())
// // Utilities.printArray(taxonomies.get(tax));
// }
// /**
// * Method to get all reaction Containers for all pt.uminho.ceb.biosystems.transyt.scraper.blast results.
// *
// * @return
// */
// private void buildReactionContainersForBlastResults(){
//
// mainReactions = new HashMap<>();
//
// // Map<String, List<AlignmentCapsule>> blastResults2 = new HashMap<>();
//
// // blastResults2.put("YP_003536402.1", blastResults.get("YP_003536402.1"));
//
// for(String queryAccession : blastResults.keySet()) {
//
// try {
// Set<String> homologous = new HashSet<>();
//
// Map<String, Set<ReactionContainer>> containers = new HashMap<>();
//
// for(AlignmentCapsule capsule : blastResults.get(queryAccession)) {
//
// homologous.add(capsule.getTarget());
//
// String tcNumber = capsule.getTcdbID();
//
// if(!tcNumbersNotPresentInTRIAGEdatabase.contains(tcNumber)) {
//
// Node tcNumberNode = service.findTcNumberNode(tcNumber);
//
// if(tcNumberNode != null) {
//
// if(!mainReactions.containsKey(tcNumber)) {
//
// String ids = (String) tcNumberNode.getProperty(TriageGeneralProperties.MainReactionsIDs.toString());
//
// mainReactions.put(tcNumber, Arrays.asList(ids.split(", ")));
// }
//
// Set<Node> reactionNodes = new HashSet<>();
//
// Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
//
// for(Relationship rel : relations) {
//
// reactionNodes.add(rel.getEndNode());
//
// }
//
// containers.put(tcNumber, getAllNodesAsReactionContainers(tcNumber, reactionNodes));
// }
// else {
// tcNumbersNotPresentInTRIAGEdatabase.add(tcNumber);
//
// logger.warn("No results found in TRIAGE neo4j database for tcNumber {}! Please consider update the database.", tcNumber);
// }
// }
// }
//
// reactionContainers.put(queryAccession, containers);
//
// homologousGenes.put(queryAccession, homologous);
// }
// catch (Exception e) {
//
// logger.error("An error occurred while retrieving the pt.uminho.ceb.biosystems.transyt.service.reactions for UniProt Accession: ", queryAccession);
//
// e.printStackTrace();
// }
//
// }
// }
/**
* Build containers and count common taxa.
*
......@@ -1690,8 +1610,8 @@ public class ProvideTransportReactionsToGenes {
for(String queryAccession : blastResults.keySet()) {
// if(queryAccession.equals("b2416"))
// System.out.println();
// if(queryAccession.equals("b2416"))
// System.out.println();
Set<String> homologousGenes = new HashSet<>();
Map<String, Double> similarities = new HashMap<>();
......@@ -1807,69 +1727,6 @@ public class ProvideTransportReactionsToGenes {
return genes;
}
// /**
// * Get the transport type for a given accession.
// *
// * @param queryAccession
// * @return
// */
// private TypeOfTransporter getTransportType(String queryAccession) {
//
// try {
// for(ReactionContainer container : reactionContainers.get(queryAccession)) {
//