Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit ec1c4d65 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

minor changes

parent 17f64607
......@@ -127,11 +127,11 @@ public class WriteByMetabolitesID {
// }
// }
Node node = service.getNodeByEntryAndLabel("META:Ions", MetaboliteMajorLabel.MetaCyc);
Node node = service.getNodeByEntryAndLabel("META:Amino-Acids-20", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties());
// Node node = service.getNodeByEntryAndLabel("cpd15460", MetaboliteMajorLabel.ModelSeed);
// Node node = service.getNodeByEntryAndLabel("cpd11420", MetaboliteMajorLabel.ModelSeed);
//
// System.out.println(node.getAllProperties());
......@@ -161,7 +161,7 @@ public class WriteByMetabolitesID {
try {
Boolean generate = true;
String accession = "Q8DL32"; //
String accession = "Q9X0S4"; //
// test2(service, null, null);
......
......@@ -255,9 +255,6 @@ public class Reports {
reportPath = reportPath.concat("scoresMethod1.txt");
System.out.println(reportByEvalue.containsKey("b3201"));
System.out.println(reportByEvalueAux.containsKey("b3201"));
try {
PrintWriter writer = new PrintWriter(reportPath, "UTF-8");
......
......@@ -56,6 +56,8 @@ public class IdentifyReactionsMetabolites {
logger.info("Total found: {}", tcdbMetabolitesIDs.size());
System.out.println(tcdbMetabolitesIDs);
}
/**
......@@ -717,8 +719,11 @@ public class IdentifyReactionsMetabolites {
entry[0] = ids.get(MetaboliteMajorLabel.MetaCyc);
entry[1] = MetaboliteMajorLabel.MetaCyc.toString();
if(metabolite.equals("META:Amino-Acids-20"))
entry[0] = metabolite;
forChildsSearch.put(metabolite, entry);
forChildsSearch.put(metabolite, entry);
}
else if(ids.containsKey(MetaboliteMajorLabel.EcoCyc)) { //ecocyc after metacyc
......@@ -726,6 +731,9 @@ public class IdentifyReactionsMetabolites {
entry[0] = ids.get(MetaboliteMajorLabel.MetaCyc);
entry[1] = MetaboliteMajorLabel.MetaCyc.toString();
if(metabolite.equals("META:Amino-Acids-20"))
entry[0] = metabolite;
forChildsSearch.put(metabolite, entry);
}
......
......@@ -20,6 +20,8 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gargoylesoftware.htmlunit.javascript.host.dom.Node;
import pt.uminho.ceb.biosystems.merlin.transporters.core.utils.Enumerators.TransportType;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.merlin.utilities.io.FileUtils;
......@@ -31,6 +33,8 @@ import pt.uminho.ceb.biosystems.mew.biocomponents.container.components.ReactionT
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.exceptions.ReactionAlreadyExistsException;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLLevel3Reader;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLReader;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.reactionsGenerator.GenerateTransportReactions;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.utilities.ProcessTcdbMetabolites;
import pt.uminho.ceb.biosystems.transyt.service.blast.Blast;
import pt.uminho.ceb.biosystems.transyt.service.internalDB.WriteByMetabolitesID;
import pt.uminho.ceb.biosystems.transyt.service.kbase.ModelSEEDRealatedOperations;
......@@ -50,10 +54,12 @@ import pt.uminho.ceb.biosystems.transyt.utilities.capsules.GeneContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.Organism;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.Subunits;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.TcdbMetabolitesContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.Enumerators;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.Enumerators.TypeOfTransporter;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.MetaboliteReferenceDatabaseEnum;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.JSONFilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.WriteExcel;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
......@@ -102,7 +108,7 @@ public class ProvideTransportReactionsToGenes {
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
public ProvideTransportReactionsToGenes(String workFolderID, Organism organismProperties, String queryPath, String modelPath,
String metabolitesPath, String resultPath, Properties properties) {
String metabolitesPath, String resultPath, Properties properties, boolean searchReactions) {
logger.info("TranSyT initialized!");
......@@ -127,46 +133,32 @@ public class ProvideTransportReactionsToGenes {
this.subunits = new Subunits();
setDefaultRelationshipsToSearch();
// findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(workFolderID, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
logger.info("Beginning transaction with neo4j TranSyT database...");
service = TransytNeo4jInitializer.getDatabaseService(properties);
// service = new TransytGraphDatabaseService(graphDatabaseService);
logger.info("Beginning transaction with neo4j TranSyT database...");
resultPath = resultPath.concat("results/");
// Transaction dataTx = graphDatabaseService.beginTx();
//////////////////////////
// @SuppressWarnings("resource")
// Scanner reader = new Scanner(System.in);
//
// int n = 1;
//
//
// while (n != 99) {
File resultsFile = new File(resultPath);
if(!resultsFile.exists())
resultsFile.mkdirs();
start(metabolitesPath, queryPath, resultPath);
// resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
// System.out.println("Insert a number to repeat or 99 to finish");
//
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
/////////////////////////
// dataTx.close();
// service.shutdown();
// graphDatabaseService.shutdown();
Map<String, GeneContainer> genesContainers = buildGenesContainers();
if(searchReactions)
annotateReactionsToGenes(resultPath, metabolitesPath, genesContainers);
else
annotateTransportProteinsFunction(resultPath, genesContainers);
service.close();
......@@ -180,150 +172,182 @@ public class ProvideTransportReactionsToGenes {
}
}
private void start(String metabolitesPath,String queryPath, String resultPath) {
private void start(String metabolitesPath,String queryPath, String resultPath, boolean searchReactions) {
try {
// String path = new File(queryPath).getParent().concat("/");
resultPath = resultPath.concat("results/");
File resultsFile = new File(resultPath);
if(!resultsFile.exists())
resultsFile.mkdirs();
}
catch (Exception e) {
e.printStackTrace();
}
}
reactionsToIgnore = new HashSet<>();
// tests();
/**
* Models' reactions annotation method
*
* @param resultPath
* @param genesContainers
* @throws Exception
*/
private void annotateReactionsToGenes(String resultPath, String metabolitesPath, Map<String, GeneContainer> genesContainers) throws Exception {
this.modelMetabolites = Tools.readModelMetabolitesFromSBML(modelPath); //Alterar isto para ler do container directamente
reactionsToIgnore = new HashSet<>();
if(this.modelMetabolites == null) {
logger.info("Searching metabolites.txt file...");
this.modelMetabolites = FilesUtils.readWordsInFile(metabolitesPath);
this.modelMetabolites = Tools.readModelMetabolitesFromSBML(modelPath); //Alterar isto para ler do container directamente
if(this.modelMetabolites.isEmpty()) { //quick fix to not brake generic method
this.modelMetabolites = null;
logger.warn("Filter by compounds in model disabled!");
}
}
if(this.modelMetabolites == null) {
logger.info("Searching metabolites.txt file...");
this.modelMetabolites = FilesUtils.readWordsInFile(metabolitesPath);
if(this.modelMetabolites != null)
logger.info("Model metabolites size: " + this.modelMetabolites.size());
if(this.modelMetabolites.isEmpty()) { //quick fix to not brake generic method
this.modelMetabolites = null;
logger.warn("Filter by compounds in model disabled!");
}
}
logger.debug("Searching reactions...");
if(this.modelMetabolites != null)
logger.info("Model metabolites size: " + this.modelMetabolites.size());
// resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
logger.debug("Searching reactions...");
Set<String> tcNumbers = identifyTcNumbersForSearch();
Set<String> tcNumbers = identifyTcNumbersForSearch();
logger.info("Searching reactions in {} tcNumbers", tcNumbers.size());
// this.subunits.setSubunitsDatabase(findSubunitsInDatabase(tcNumbers));
reactionsByTcNumber = getReactionsByTcNumber(tcNumbers);
logger.info("Searching reactions in {} tcNumbers", tcNumbers.size());
if(reactionContainersByID.keySet().size() == 0 && modelMetabolites != null)
logger.warn("No metabolites present in the model are available in the selected reactions!");
reactionsByTcNumber = getReactionsByTcNumber(tcNumbers);
Map<String, Set<String>> similaritiesResults = new HashMap<>(); //the results of this map will be sorted by similarity
// System.out.println(reactionsByTcNumber);
if(!properties.isIgnoreMethod2())
similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(genesContainers,
reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore, properties);
if(reactionContainersByID.keySet().size() == 0 && modelMetabolites != null)
logger.warn("No metabolites present in the model are available in the selected reactions!");
// for(String tc : reactionsByTcNumber.keySet())
// System.out.println(tc + "\t" + reactionsByTcNumber.get(tc));
Map<String, GeneContainer> genesContainers = buildGenesContainers();
// System.out.println("Main Reactions -> " + mainReactions.size());
//
// for(String key : mainReactions.keySet())
// System.out.println(key + "\t" + mainReactions.get(key));
// String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
Map<String, Set<String>> similaritiesResults = new HashMap<>(); //the results of this map will be sorted by similarity
// generateFinalResults(similaritiesResults);
if(!properties.isIgnoreMethod2())
similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(genesContainers,
reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore, properties);
generateFinalResultsAuxValidation_filter_reactions(similaritiesResults, genesContainers);
// for(String tc : reactionsByTcNumber.keySet())
// System.out.println(tc + "\t" + reactionsByTcNumber.get(tc));
Reports.saveReportByEvalue(resultPath, reportByEvalue, reportByEvalueAux);
// System.out.println(finalResults);
// String path = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\";
logger.debug("Reactions search complete!");
// generateFinalResults(similaritiesResults);
Map<String, Set<String>> subunitsInDatabase = service.findAllSubunitsInDatabase();
generateFinalResultsAuxValidation_filter_reactions(similaritiesResults, genesContainers);
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(subunitsInDatabase,
blastResults, genesContainers, properties);
Reports.saveReportByEvalue(resultPath, reportByEvalue, reportByEvalueAux);
// for(String key : finalResults.keySet())
// for(String newkey : finalResults.get(key).keySet())
// System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey));
// System.out.println(finalResults);
logger.debug("Reactions search complete!");
Map<String, String> geneRules = GPRAssociations.buildGeneRules(resultPath, service, proteinComplexes, finalResults,
subunitsInDatabase, this.hprHomologues, this.phosphotransferaseHomologues, this.finalResults.keySet());
Map<String, Set<String>> subunitsInDatabase = service.findAllSubunitsInDatabase();
// System.out.println("here");
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(subunitsInDatabase,
blastResults, genesContainers, properties);
OutputTransytFormat output = new OutputTransytFormat(resultPath + "/reactions_references.txt", finalResults, metabolitesFormulas,
reactionContainersByID, geneRules, metabolitesNames);
// for(String key : finalResults.keySet())
// for(String newkey : finalResults.get(key).keySet())
// System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey));
Container container = new Container(output);
// countReactionsByGene(geneRules, container.getReactions());
Map<String, String> geneRules = GPRAssociations.buildGeneRules(resultPath, service, proteinComplexes, finalResults,
subunitsInDatabase, this.hprHomologues, this.phosphotransferaseHomologues, this.finalResults.keySet());
//
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
//
//
//
//String fileName = "sbmlResult".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".xml");
//
// System.out.println("here");
String sbmlPath = resultPath.concat("/transyt.xml");
File fileResults = new File(sbmlPath);
OutputTransytFormat output = new OutputTransytFormat(finalResults, metabolitesFormulas, reactionContainersByID, geneRules, metabolitesNames);
TransytSBMLLevel3Writer sbml = new TransytSBMLLevel3Writer(fileResults.getAbsolutePath(), container, taxonomyID, false);
// TransytSBMLLevel3Writer sbml = new TransytSBMLLevel3Writer(path.concat("SBML/").concat(fileName), container, taxonomyID, false);
Container container = new Container(output);
sbml.writeToFile();
// countReactionsByGene(geneRules, container.getReactions());
logger.info("Generating md5 checksum.");
//
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
//
//
//
//String fileName = "sbmlResult".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".xml");
//
String hash = FilesUtils.getFileChecksum(MessageDigest.getInstance("MD5"), fileResults);
String sbmlPath = resultPath.concat("/transyt.xml");
File fileResults = new File(sbmlPath);
logger.info("md5 checksum generated: {}", hash);
TransytSBMLLevel3Writer sbml = new TransytSBMLLevel3Writer(fileResults.getAbsolutePath(), container, taxonomyID, false);
// TransytSBMLLevel3Writer sbml = new TransytSBMLLevel3Writer(path.concat("SBML/").concat(fileName), container, taxonomyID, false);
FilesUtils.saveWordInFile(fileResults.getParent().concat("/checksum.md5"), hash);
sbml.writeToFile();
logger.info("Saving results in .zip file");
logger.info("Generating md5 checksum.");
System.out.println(fileResults.getParent());
System.out.println(fileResults.getAbsolutePath().concat("/results.zip"));
String hash = FilesUtils.getFileChecksum(MessageDigest.getInstance("MD5"), fileResults);
FileUtils.createZipFile(fileResults.getParent(), fileResults.getParentFile().getParent().concat("/results.zip"), 5);
logger.info("md5 checksum generated: {}", hash);
logger.info("zip file created!");
}
FilesUtils.saveWordInFile(fileResults.getParent().concat("/checksum.md5"), hash);
/**
* Method to just try to annotate the genes with a TC number and short description, ignoring reactions association.
*
* @param resultPath
* @param genesContainers
* @throws Exception
*/
private void annotateTransportProteinsFunction(String resultPath, Map<String, GeneContainer> genesContainers) throws Exception {
logger.info("Saving results in .zip file");
List<String[]> table = JSONFilesUtils.readTCDBScrapedInfo();
System.out.println(fileResults.getParent());
System.out.println(fileResults.getAbsolutePath().concat("/results.zip"));
Map<String, String> descriptions = ProcessTcdbMetabolites.getTCDescriptions(table);
FileUtils.createZipFile(fileResults.getParent(), fileResults.getParentFile().getParent().concat("/results.zip"), 5);
Map<String, String> annotationsByGene = new HashMap<>();
logger.info("zip file created!");
//
for(String queryAccession : this.resultsByEvalue.keySet()) {
//
// validation(container, path);
String annotation = null;
// constructSBMLKBase(container, path);
if(genesContainers.get(queryAccession).getAnnotatedFamily() != null) {
}
catch (Exception e) {
e.printStackTrace();
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
if(tcNumber.contains(tcFamily)){
String shortDescription = descriptions.get(tcNumber).split("\\.")[0];
if(annotation != null)
annotation = annotation + "### ";
else
annotation = "";
annotation = annotation + tcNumber + " - " + shortDescription;
}
}
}
}
if(annotation != null)
annotationsByGene.put(queryAccession, annotation);
}
FilesUtils.saveMapInFile(resultPath + "transport_genes_annotation.txt", annotationsByGene);
}
private void countReactionsByGene(Map<String, String> geneRules, Map<String, ReactionCI> sbmlReactions) {
......@@ -452,35 +476,6 @@ public class ProvideTransportReactionsToGenes {
}
}
// /**
// * Merge the results of both methods
// *
// * @param similaritiesResults
// */
// private void generateFinalResults(Map<String, Map<String, Set<String>>> similaritiesResults) {
//
// finalResults = new HashMap<>(similaritiesResults);
//
// for(String queryAccession : resultsByEvalue.keySet()) {
//
// Map<String, Set<String>> res = new HashMap<>();
//
// for(String tcNumber : resultsByEvalue.get(queryAccession)) {
//
// if(reactionsByTcNumberForAnnotation.containsKey(tcNumber))
// res.put(tcNumber, reactionsByTcNumberForAnnotation.get(tcNumber));
//
// // if(reactionsByTcNumber.containsKey(tcNumber))
// // res.put(tcNumber, reactionsByTcNumber.get(tcNumber));
//
// }
//
//
//
// finalResults.put(queryAccession, res);
// }
// }
private Map<String, Set<String>> getReactionsForGenesByEvalue() {
Map<String, Set<String>> results = new LinkedHashMap<>();
......@@ -798,7 +793,7 @@ public class ProvideTransportReactionsToGenes {
private void generateFinalResultsAuxValidation_filter_reactions(Map<String, Set<String>> similaritiesResults,
Map<String, GeneContainer> genesContainers) {
// Map<String, String> similaritiesReport = new HashMap<>();
// Map<String, String> similaritiesReport = new HashMap<>();
Set<String> compounds = new HashSet<>();
......@@ -808,29 +803,29 @@ public class ProvideTransportReactionsToGenes {
resultsByEvalue = new HashMap<String, Set<String>>();
for(String queryAccession : blastResults.keySet()) {
// if(queryAccession.equals("b3201"))
// if(queryAccession.equals("b4321"))
// System.out.println();
Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>();
Map<String, Set<String>> res = new HashMap<>();
boolean save = false;
Map<String, TypeOfTransporter> transportTypes = new HashMap<>();
if(genesContainers.get(queryAccession).getAnnotatedFamily() != null) {
TypeOfTransporter transportType = null;
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){
Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber);
......@@ -865,20 +860,20 @@ public class ProvideTransportReactionsToGenes {
}
}
}
for(String tcNumber : genesContainers.get(queryAccession).getClosestTCnumbers()) { //this list is sorted by similarity
transportType = transportTypes.get(tcNumber);
if(transportType != null)
break;
}
if(res.size() > 0) {
String pattern = ".*T.[" + TypeOfTransporter.getTransportTypeID(TypeOfTransporter.BiochemicalATP).toString() + "-9].*";
if(!reactionsAlreadyAssigned.toString().matches(pattern)) {
String regex = ".*T." + TypeOfTransporter.getTransportTypeID(transportType).toString() + ".*";
for(String tc : new HashMap<>(res).keySet()) {
......@@ -908,28 +903,27 @@ public class ProvideTransportReactionsToGenes {
String id = iterator.next();
if(!reactionsAlreadyAssigned.contains(id) &&