Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit e9336c6f authored by Davide Lagoa's avatar Davide Lagoa
Browse files

properties refactoring

parent e3ad0c9c
......@@ -4,7 +4,7 @@
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>service</artifactId>
<name>service</name>
<version>0.0.7-SNAPSHOT</version>
<version>0.0.10-SNAPSHOT</version>
<build>
<plugins>
<plugin>
......
......@@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>service</artifactId>
<version>0.0.7-SNAPSHOT</version>
<version>0.0.10-SNAPSHOT</version>
<properties>
......@@ -111,7 +111,7 @@
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>utilities</artifactId>
<version>0.0.7-SNAPSHOT</version>
<version>0.0.10-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
......@@ -141,7 +141,7 @@
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>scraper</artifactId>
<version>0.0.7-SNAPSHOT</version>
<version>0.0.10-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>uk.ac.ebi.uniprot</groupId>
......
......@@ -2,7 +2,6 @@ package pt.uminho.ceb.biosystems.transyt.service.internalDB;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
......@@ -15,8 +14,6 @@ import org.slf4j.LoggerFactory;
import com.google.common.collect.Iterators;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import pt.uminho.ceb.biosystems.transyt.scraper.APIs.ChebiAPI;
import pt.uminho.ceb.biosystems.transyt.service.containers.BiosynthMetabolites;
import pt.uminho.ceb.biosystems.transyt.service.utilities.FileUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
......@@ -25,12 +22,6 @@ import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetabolitePropertyLabel;
import pt.uminho.sysbio.biosynth.integration.neo4j.BiodbMetaboliteNode;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class FetchCompoundsByName {
......
......@@ -50,8 +50,8 @@ public class WriteByMetabolitesID {
Map<String, BiosynthMetaboliteProperties> data = getBiosynthDBData(service);
// Map<String, BiosynthMetaboliteProperties> data = null;
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
// @SuppressWarnings("resource")
// Scanner reader = new Scanner(System.in);
// System.out.println("Writing Excel...");
// WriteExcel.writeNeo4jKeggInfo(data);
......@@ -77,27 +77,27 @@ public class WriteByMetabolitesID {
Map<String, Set<TcNumberContainer2>> newData = test(namesAndIDsContainer, data, service, reactionsData, properties); //uncomment
int n = 1;
while (n != 99) {
n = 99;
// int n = 1;
//
// while (n != 99) {
//
// n = 99;
// test2(service, data, namesAndIDsContainer);
new PopulateTransytNeo4jDatabase(data, newData, properties); //uncomment
//
System.out.println("Enter a random number to repeat (100 to repeat data retrieval) or 99 to finish: ");
try {
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
}
// System.out.println("Enter a random number to repeat (100 to repeat data retrieval) or 99 to finish: ");
//
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
dataTx.failure();
dataTx.close();
service.shutdown();
......
......@@ -71,7 +71,8 @@ public class Tools {
}
}
catch (Exception e) {
logger.error("An error occurred while reading the model!!!! Searching metabolites.txt file...");
logger.error("An error occurred while reading the model!!!!");
allMetabolites = null;
// e.printStackTrace();
}
......
......@@ -83,20 +83,19 @@ public class ProvideTransportReactionsToGenes {
private TransytRelationshipType defaultRelationshipProducts;
private TransytNodeLabel defaultMetaboliteLabel;
private Subunits subunits;
private String modelPath = ""; //DELETE ME
private String modelPath = "";
private Integer taxonomyID = null;
private Map<String, String> metabolitesNames;
private Map<String, String> metabolitesFormulas;
private Map<String, Map<String, Double>> reportByEvalue = new HashMap<>();
private Map<String, Map<String, Set<String>>> reportByEvalueAux = new HashMap<>();
private boolean overrideCommonOntology = false;
private static final Map<String, Integer> GENERATIONS_EXCEPTION_FILE = FilesUtils.readGenerationsLimitFile(FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("ChildsLimits.txt"));
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
public ProvideTransportReactionsToGenes(String workFolderID, Organism organismProperties, String queryPath, String modelPath,
String metabolitesPath, String resultPath, Properties properties, boolean forceBlast, boolean overrideCommonOntology) {
String metabolitesPath, String resultPath, Properties properties, boolean forceBlast) {
logger.info("TranSyT initialized!");
......@@ -109,7 +108,6 @@ public class ProvideTransportReactionsToGenes {
tcNumbersNotPresentInTransytDatabase = new HashSet<>();
reactionContainersByID = new HashMap<>();
this.modelPath = modelPath;
this.overrideCommonOntology = overrideCommonOntology;
this.taxonomyID = organismProperties.getTaxonomyID();
this.taxonomy = organismProperties.getTaxonomy();
......@@ -191,12 +189,20 @@ public class ProvideTransportReactionsToGenes {
reactionsToIgnore = new HashSet<>();
// tests();
modelMetabolites = Tools.readModelMetabolitesFromSBML(modelPath); //Alterar isto para ler do container directamente
this.modelMetabolites = Tools.readModelMetabolitesFromSBML(modelPath); //Alterar isto para ler do container directamente
if(modelMetabolites == null || modelMetabolites.isEmpty())
modelMetabolites = FilesUtils.readWordsInFile(metabolitesPath);
logger.info("Model metabolites size: " + modelMetabolites.size());
if(this.modelMetabolites == null) {
logger.info("Searching metabolites.txt file...");
this.modelMetabolites = FilesUtils.readWordsInFile(metabolitesPath);
if(this.modelMetabolites.isEmpty()) { //quick fix to not brake generic method
this.modelMetabolites = null;
logger.warn("Filter by compounds in model disabled!");
}
}
if(this.modelMetabolites != null)
logger.info("Model metabolites size: " + this.modelMetabolites.size());
logger.debug("Searching reactions...");
......@@ -223,7 +229,7 @@ public class ProvideTransportReactionsToGenes {
// System.out.println(key + "\t" + mainReactions.get(key));
Map<String, Set<String>> similaritiesResults = ReactionsPredictor.getReactionsForGenesBySimilarities(resultPath, genesContainers,
reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore);
reactionsByTcNumber, blastResults, mainReactions, reactionsToIgnore, properties);
// for(String tc : reactionsByTcNumber.keySet())
// System.out.println(tc + "\t" + reactionsByTcNumber.get(tc));
......@@ -1163,7 +1169,6 @@ public class ProvideTransportReactionsToGenes {
TransytNode tcNumberNode = tcNodes.get(tcNumber);
Map<String, Boolean> reversibility = new HashMap<>();
Map<String, String> irreversibleIDs = new HashMap<>();
if(!mainReactions.containsKey(tcNumber) && tcNumberNode.hasProperty(TransytGeneralProperties.MainReactionsIDs)) {
......@@ -1180,7 +1185,7 @@ public class ProvideTransportReactionsToGenes {
for(TransytRelationship rel : relations) {
if(overrideCommonOntology || Boolean.valueOf(rel.getProperty(TransytGeneralProperties.CommonOntology))) {
if(properties.isOverrideCommonOntologyFilter() || Boolean.valueOf(rel.getProperty(TransytGeneralProperties.CommonOntology))) {
TransytNode node = rel.getOtherEndNode();
......@@ -1479,7 +1484,8 @@ public class ProvideTransportReactionsToGenes {
similaritySum = similaritySum + capsule.getBitScore();
}
String tcFamily = ReactionsPredictor.annotateTcFamily(totalEntries, similaritySum, familiesFrequency, familiesSimilarity);
String tcFamily = ReactionsPredictor.annotateTcFamily(totalEntries, similaritySum, familiesFrequency,
familiesSimilarity, properties);
tcFamilies.add(tcFamily);
......
package pt.uminho.ceb.biosystems.transyt.service.reactions;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
......@@ -12,21 +11,15 @@ import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.GeneContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class ReactionsPredictor {
private static final Double ALPHA = 0.8;
private static final Double BETA = 0.3;
private static final Integer MINIMUM_HITS = 2;
private static final Double THRESHOLD = 0.5;
private static final Double W = 0.4;
private static final Logger logger = LoggerFactory.getLogger(ReactionsPredictor.class);
public static Map<String, Set<String>> getReactionsForGenesBySimilarities(String reportPath, Map<String, GeneContainer> data, Map<String, Set<String>> reactionsByTcNumber,
Map<String, List<AlignmentCapsule>> blastResults, Map<String, List<String>> mainReactions, Set<String> reactionsToIgnore) {
Map<String, List<AlignmentCapsule>> blastResults, Map<String, List<String>> mainReactions, Set<String> reactionsToIgnore, Properties properties) {
Map<String, Set<String>> results = new HashMap<>();
......@@ -35,16 +28,13 @@ public class ReactionsPredictor {
int lastProgress = -1;
int current = 0;
// Map<String, GeneContainer> data2 = new HashMap<>();
//
// data2.put("YP_003535950.1", data.get("YP_003535950.1")); //YP_003535791.1 //YP_003536402.1 -- 3.A.1.7.5
Map<String, String> similaritiesReport = new HashMap<>();
for(String queryAccession : data.keySet()) {
try {
Map<String, String> reactions = findTransportReactions(data.get(queryAccession), reactionsByTcNumber, blastResults.get(queryAccession), mainReactions, reactionsToIgnore);
Map<String, String> reactions = findTransportReactions(data.get(queryAccession), reactionsByTcNumber,
blastResults.get(queryAccession), mainReactions, reactionsToIgnore, properties);
results.put(queryAccession, reactions.keySet());
......@@ -82,12 +72,9 @@ public class ReactionsPredictor {
* @param geneContainer
*/
private static Map<String, String> findTransportReactions(GeneContainer geneContainer, Map<String, Set<String>> reactionsByTcNumber, List<AlignmentCapsule> blastResults,
Map<String, List<String>> mainReactions, Set<String> reactionsToIgnore) {
// Map<String, Set<String>> results = new HashMap<>();
Map<String, List<String>> mainReactions, Set<String> reactionsToIgnore, Properties properties) {
Map<String, Integer> hits = new HashMap<>();
// Set<String> allReactions = new HashSet<>();
Map<String, Double> reactionsSimilarity = new HashMap<>();
......@@ -128,17 +115,6 @@ public class ReactionsPredictor {
}
}
// for(String key : similarities.keySet())
// total = total + similarities.get(key);
// System.out.println(setReactionContainers.size());
// Set<String> allReactions = getAllReactions(reactionsByTcNumber);
// double similaritySum = getSimilaritySum(geneContainer.getSimilarities(), blastResults, mainReactions, reactionsByTcNumber);
// Map<String, String> reactionIDsMap = buildIDsMap(reactionsByTcNumber);
int tMax = geneContainer.getMaxTax();
Map<String, String> res = new HashMap<>();
......@@ -150,25 +126,22 @@ public class ReactionsPredictor {
if(!reactionsToIgnore.contains(reactionID)) {
int hit = hits.get(reactionID);
// System.out.println(commonTaxa);
if(hit < MINIMUM_HITS)
gap = MINIMUM_HITS - hit;
if(hit < properties.getMinimumHits())
gap = properties.getMinimumHits() - hit;
// System.out.println(gap);
// System.out.println(hits);
// System.out.println("freqScore = " + reactionsSimilarity.get(reactionID) + " / " + similaritySum);
// System.out.println("taxScore = " + commonTaxaSum.get(reactionID) + " * " + (1-gap * BETA)+ " / " + hit * tMax);
double freqScore = reactionsSimilarity.get(reactionID) / similaritySum;
double taxScore = commonTaxaSum.get(reactionID) * (1-gap * BETA) / (hit * tMax);
double taxScore = commonTaxaSum.get(reactionID) * (1-gap * properties.getBeta()) / (hit * tMax);
Double score = ALPHA * freqScore + (1-ALPHA) * taxScore;
Double score = properties.getAlpha() * freqScore + (1-properties.getAlpha()) * taxScore;
// System.out.println(reactionID + " ---->>> \t" + ALPHA + " * " + freqScore + " + (1 - " + ALPHA + " ) * " + taxScore + " = " + score) ;
// System.out.println(reactionID + " ---->>> \t" + ALPHA + " * " + freqScore + " + (1 - " + ALPHA + " ) * " + taxScore + " = " + score) ;
if(score > THRESHOLD) {
if(score > properties.getReactionsAnnotationScoreThreshold()) {
// System.out.println(reactionID + "\t" + score);
//
......@@ -193,15 +166,15 @@ public class ReactionsPredictor {
* @return
*/
public static String annotateTcFamily(int totalEntries, double similaritySum, Map<String, Integer> familiesFrequency,
Map<String, Double> familiesSimilarity) {
Map<String, Double> familiesSimilarity, Properties properties) {
String annotation = "";
double value = 0.0;
for(String tcF : familiesFrequency.keySet()) {
double score = (familiesFrequency.get(tcF) / totalEntries) * W
+ (familiesSimilarity.get(tcF) / similaritySum) * (1 - W);
double score = (familiesFrequency.get(tcF) / totalEntries) * properties.getAlphaFamiliesAnnotation()
+ (familiesSimilarity.get(tcF) / similaritySum) * (1 - properties.getAlphaFamiliesAnnotation());
if(score > value) {
annotation = tcF;
......@@ -211,120 +184,4 @@ public class ReactionsPredictor {
return annotation;
}
// /**
// * Counts the number of hits of a metabolite for a given set of reaction containers.
// *
// * @param metabolite
// * @param transport
// * @param setReactionContainers
// * @param blastResults
// * @return
// */
// private static Double[] countReactionHitsAndSimilaritySum(String reactionID, Map<String, Set<ReactionContainer>> reactionContainers,
// List<AlignmentCapsule> blastResults, GeneContainer geneContainer) {
//
// Set<String> tcNumberMatch = new HashSet<>();
//
// Double similarity = 0.0;
// Double commonTaxa = 0.0;
//
// for(String tcNumber : reactionContainers.keySet()) {
// for(ReactionContainer container : reactionContainers.get(tcNumber)) {
//
// if(container.getReactionID().equals(reactionID)) {
// tcNumberMatch.add(container.getTcNumber());
// }
// }
// }
//
// for(AlignmentCapsule capsule : blastResults) {
//
// if(tcNumberMatch.contains(capsule.getTcdbID())) {
//
// similarity = similarity + capsule.getScore();
// commonTaxa = commonTaxa + geneContainer.getCommomTaxaCount().get(capsule.getTarget());
//
// }
// }
//
// Double[] results = new Double[2];
//
// results[0] = similarity;
// results[1] = commonTaxa;
//
// return results;
// }
// /**
// * Method to retrieve the metabolites of all pt.uminho.ceb.biosystems.transyt.service.reactions associated to the given containers.
// *
// * @param setReactionContainers
// * @return
// */
// private static Map<String, String> buildIDsMap(Map<String, Set<ReactionContainer>> reactionContainers) {
//
// Map<String, String> reactionsById = new HashMap<>();
//
// for(String tcNumber : reactionContainers.keySet()) {
// for(ReactionContainer container : reactionContainers.get(tcNumber)) {
//
// if(!reactionsById.containsKey(container.getReactionID()))
// reactionsById.put(container.getReactionID(), container.getReaction());
// }
// }
// return reactionsById;
// }
// /**
// * Method to retrieve the metabolites of all pt.uminho.ceb.biosystems.transyt.service.reactions associated to the given containers.
// *
// * @param reactionContainers
// * @return
// */
// private static Set<String> getAllReactions(Map<String, Set<ReactionContainer>> reactionContainers) {
//
// Set<String> allMetabolites = new HashSet<>();
//
// for(String tcNumber : reactionContainers.keySet()) {
// for(ReactionContainer container : reactionContainers.get(tcNumber))
// allMetabolites.add(container.getReactionID());
// }
//
// return allMetabolites;
// }
// /**
// * @param similarities
// * @return
// */
// private static Double getSimilaritySum(Map<String, Double> similarities, List<AlignmentCapsule> blastResults, Map<String, List<String>> mainReactions,
// Map<String, Set<ReactionContainer>> reactionContainers) {
//
// Double total = 0.0;
//
// for(AlignmentCapsule capsule : blastResults) {
//
// String tcNumber = capsule.getTcdbID();
//
// if(reactionContainers.containsKey(tcNumber)) {
//
// for(ReactionContainer container : reactionContainers.get(tcNumber)) {
//
// if(mainReactions.get(tcNumber).contains(container.getReactionID())) {
//
// total = total + similarities.get(capsule.getTarget());
//
// }
//
// }
// }
// }
// for(String key : similarities.keySet())
// total = total + similarities.get(key);
// return total;
// }
}
package pt.uminho.ceb.biosystems.transyt.service.transyt;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -12,8 +10,6 @@ import pt.uminho.ceb.biosystems.transyt.service.utilities.OrganismProperties;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.Organism;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.Enumerators.STAIN;
import pt.uminho.ceb.biosystems.transyt.utilities.enumerators.MetaboliteReferenceDatabaseEnum;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.files.JSONFilesUtils;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
/**
......@@ -31,24 +27,18 @@ public class TransyTestMain {
// String command = args[0];
// JSONFilesUtils.readJSONtcdbReactionsFile();
//
// System.exit(0);
//
Properties properties = new Properties();
String path = "/Users/davidelagoa/Desktop/ecoli/ecoli_iAF1260b/";
String taxPath = path + "taxID.txt";
List<String> params = FilesUtils.readListWordsInFile(taxPath);
Integer taxID = Integer.valueOf(params.get(0));
String filePath = path + "protein.faa";
String modelPath = path + "model.xml";
String command = "2";
String command = "1";
String resultPath = path + "results_e#1/";
......@@ -64,7 +54,7 @@ public class TransyTestMain {
logger.info("Scraper and database service option selected!");
Retriever.main(null);
WriteByMetabolitesID.start(properties);
// WriteByMetabolitesID.start(properties);
break;
......@@ -107,13 +97,13 @@ public class TransyTestMain {
properties.setDefaultLabel(MetaboliteReferenceDatabaseEnum.BiGG);
Organism organism = new OrganismProperties(taxID, properties, stain).getOrganism();
Organism organism = new OrganismProperties(properties, stain).getOrganism();
// String filePath = args[1];
// String modelPath = args[2];
new ProvideTransportReactionsToGenes(workFolderID, organism, filePath, modelPath,
metabolitesPath, resultPath, properties, false, false); //29421 - Nitrobacter vulgaris
metabolitesPath, resultPath, properties, false); //29421 - Nitrobacter vulgaris
break;
......
......@@ -24,16 +24,14 @@ public class TransytMain {
private static final Logger logger = LoggerFactory.getLogger(TransytMain.class);
public static void main(String[] args) throws InterruptedException, IOException {
public static void main(String[] args) throws Exception {
// Integer taxID = Integer.valueOf(args[0]);
logger.info("############################################ TranSyT - v0.0.7.3-SNAPSHOT ############################################");
logger.info("############################################ TranSyT - v0.0.10-SNAPSHOT ############################################");
String command = args[0];
Properties properties = new Properties();
STAIN stain = STAIN.gram_negative; //optional ---> user selection
switch (command) {
......@@ -42,7 +40,7 @@ public class TransytMain {
logger.info("Scraper and database service option selected!");
Retriever.main(null);
WriteByMetabolitesID.start(properties);
WriteByMetabolitesID.start(new Properties());
break;