Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 290a4d49 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refactored classification method

parent 67bffede
...@@ -107,6 +107,36 @@ ...@@ -107,6 +107,36 @@
<artifactId>neo4j-java-driver</artifactId> <artifactId>neo4j-java-driver</artifactId>
<version>4.0.0</version> <version>4.0.0</version>
</dependency> </dependency>
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>validation</artifactId>
<version>0.0.1-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency> <dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId> <groupId>pt.uminho.ceb.biosystems.transyt</groupId>
......
...@@ -340,10 +340,10 @@ public class BlastAlignmentTransyt extends Observable implements ModelAlignments ...@@ -340,10 +340,10 @@ public class BlastAlignmentTransyt extends Observable implements ModelAlignments
// this.alignments.put(queryID,iterationAlignments); // this.alignments.put(queryID,iterationAlignments);
} }
else{ // else{ //Annoying irrelevant message
//
logger.debug(iteration.getIteration().getIterationMessage().concat(" for {}"), queryID); // logger.debug(iteration.getIteration().getIterationMessage().concat(" for {}"), queryID);
} // }
} }
else{ else{
if(this.sequencesWithoutSimilarities!=null && this.sequencesWithoutSimilarities.contains(queryID)) { if(this.sequencesWithoutSimilarities!=null && this.sequencesWithoutSimilarities.contains(queryID)) {
......
...@@ -25,6 +25,7 @@ import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType; ...@@ -25,6 +25,7 @@ import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method; import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.blast.ncbi_blastparser.BlastOutput; import pt.uminho.ceb.biosystems.merlin.utilities.blast.ncbi_blastparser.BlastOutput;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule; import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.transyt.utilities.files.ReadFasta;
/** /**
* @author ODias * @author ODias
...@@ -121,7 +122,7 @@ public class RunSimilaritySearchTransyt extends Observable implements Observer { ...@@ -121,7 +122,7 @@ public class RunSimilaritySearchTransyt extends Observable implements Observer {
if(!f.exists()) if(!f.exists())
f.mkdir(); f.mkdir();
CreateGenomeFile.buildSubFastaFiles(path, this.querySequences, queriesSubSetList, queryFilesPaths, numberOfCores); ReadFasta.buildSubFastaFiles(path, this.querySequences, queriesSubSetList, queryFilesPaths, numberOfCores);
ConcurrentLinkedQueue<AlignmentCapsule> alignmentContainerSet = new ConcurrentLinkedQueue<>(); ConcurrentLinkedQueue<AlignmentCapsule> alignmentContainerSet = new ConcurrentLinkedQueue<>();
JAXBContext jc = JAXBContext.newInstance(BlastOutput.class); JAXBContext jc = JAXBContext.newInstance(BlastOutput.class);
......
...@@ -28,7 +28,7 @@ public class InternaldbMetabolites { ...@@ -28,7 +28,7 @@ public class InternaldbMetabolites {
public static Set<String> getAllMetabololites() { public static Set<String> getAllMetabololites() {
List<String[]> data = ReadExcelFile.getData(FILE_PATH); List<String[]> data = ReadExcelFile.getData(FILE_PATH, true, null);
Set<String> metabolites = new HashSet<>(); Set<String> metabolites = new HashSet<>();
...@@ -72,7 +72,7 @@ public class InternaldbMetabolites { ...@@ -72,7 +72,7 @@ public class InternaldbMetabolites {
public static Set<String> getAllMetabololites222() { public static Set<String> getAllMetabololites222() {
List<String[]> data = ReadExcelFile.getData(FILE_PATH); List<String[]> data = ReadExcelFile.getData(FILE_PATH, true, null);
Set<String> metabolites = new HashSet<>(); Set<String> metabolites = new HashSet<>();
......
...@@ -127,7 +127,7 @@ public class WriteByMetabolitesID { ...@@ -127,7 +127,7 @@ public class WriteByMetabolitesID {
// } // }
// } // }
Node node = service.getNodeByEntryAndLabel("META:Glucuronides", MetaboliteMajorLabel.MetaCyc); Node node = service.getNodeByEntryAndLabel("META:CPD0-2232", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties()); System.out.println(node.getAllProperties());
...@@ -153,10 +153,10 @@ public class WriteByMetabolitesID { ...@@ -153,10 +153,10 @@ public class WriteByMetabolitesID {
public static Map<String, Set<TcNumberContainer2>> test(BiosynthMetabolites namesAndIDsContainer, Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service, public static Map<String, Set<TcNumberContainer2>> test(BiosynthMetabolites namesAndIDsContainer, Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service,
Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) { Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) {
try { try {
Boolean generate = false; Boolean generate = true;
String accession = "P16433"; String accession = "P04840";
// test2(service, null, null); // test2(service, null, null);
...@@ -198,6 +198,8 @@ public class WriteByMetabolitesID { ...@@ -198,6 +198,8 @@ public class WriteByMetabolitesID {
System.out.println(); System.out.println();
} }
// new PopulateTransytNeo4jDatabase(data, newData, properties);
} }
// JSONFilesUtils.writeJSONTriageReactions(newData); // JSONFilesUtils.writeJSONTriageReactions(newData);
...@@ -245,6 +247,9 @@ public class WriteByMetabolitesID { ...@@ -245,6 +247,9 @@ public class WriteByMetabolitesID {
Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service); Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service);
// if(entryID.matches("META:CPD-9781") || entryID.matches("META:CPD0-2232"))
// System.out.println();
if(entryID.matches("META:.*")) { if(entryID.matches("META:.*")) {
synonyms.add(entryID); synonyms.add(entryID);
} }
......
...@@ -39,7 +39,7 @@ public class Reports { ...@@ -39,7 +39,7 @@ public class Reports {
Map<String, TcNumberContainer> backupData = JSONFilesUtils.readDataBackupFile(); Map<String, TcNumberContainer> backupData = JSONFilesUtils.readDataBackupFile();
Map<String, String> descriptions = new HashMap<>(); Map<String, String> descriptions = new HashMap<>();
List<String[]> data = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\Internal database\\results2.xlsx"); List<String[]> data = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\Internal database\\results2.xlsx", true, null);
for(String[] line : data) { for(String[] line : data) {
if(!descriptions.containsKey(line[3])) if(!descriptions.containsKey(line[3]))
......
...@@ -27,7 +27,7 @@ public class Tools { ...@@ -27,7 +27,7 @@ public class Tools {
Set<String> set = new HashSet<>(); Set<String> set = new HashSet<>();
List<String[]> file = ReadExcelFile.getData(path); List<String[]> file = ReadExcelFile.getData(path, true, null);
for(String[] line : file) for(String[] line : file)
set.add(line[8]); set.add(line[8]);
......
...@@ -78,8 +78,10 @@ public class IdentifyReactionsMetabolites { ...@@ -78,8 +78,10 @@ public class IdentifyReactionsMetabolites {
this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer); this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer);
this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs()); this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs());
System.out.println(this.allMetabolitesByName.containsKey("META:CPD0-2232"));
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron")); // System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
// //
// System.out.println("metabolites >>>" + metabolites.size()); //2078 // System.out.println("metabolites >>>" + metabolites.size()); //2078
// //
...@@ -417,14 +419,14 @@ public class IdentifyReactionsMetabolites { ...@@ -417,14 +419,14 @@ public class IdentifyReactionsMetabolites {
/** /**
* Method to identify metabolites present in pt.uminho.ceb.biosystems.transyt.service.reactions. * Method to identify metabolites present in reactions.
* *
* @param data * @param data
* @return * @return
*/ */
private Map<String, Set<String>> getMetabolitesFromReactions(Map<String, Set<TcNumberContainer2>> data){ private Map<String, Set<String>> getMetabolitesFromReactions(Map<String, Set<TcNumberContainer2>> data){
Map<String, Set<String>> metabolites = new HashMap<>(); Map<String, Set<String>> metabolites = new HashMap<>();
for(String accession : data.keySet()) { for(String accession : data.keySet()) {
...@@ -436,8 +438,9 @@ public class IdentifyReactionsMetabolites { ...@@ -436,8 +438,9 @@ public class IdentifyReactionsMetabolites {
String[] reactions = new String[] {reactionContainer.getReaction(), String[] reactions = new String[] {reactionContainer.getReaction(),
reactionContainer.getOriginalReaction()}; reactionContainer.getOriginalReaction()};
for(String reaction : reactions) { for(String reaction : reactions) {
for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction, for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction,
dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) { dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) {
if(metabolites.containsKey(entry.getKey())) if(metabolites.containsKey(entry.getKey()))
...@@ -464,7 +467,9 @@ public class IdentifyReactionsMetabolites { ...@@ -464,7 +467,9 @@ public class IdentifyReactionsMetabolites {
Set<String> metabolites = new HashSet<>(); Set<String> metabolites = new HashSet<>();
reaction = reaction.replaceAll(ReactionContainer.REV_TOKEN, "\\+").replaceAll(ReactionContainer.IRREV_TOKEN, "\\+") reaction = reaction.replaceAll(ReactionContainer.REV_TOKEN, "\\+").replaceAll(ReactionContainer.IRREV_TOKEN, "\\+")
.replaceAll("\\(" + ReactionContainer.INTERIOR_COMPARTMENT + "\\)", "").replaceAll("\\(" + ReactionContainer.EXTERIOR_COMPARTMENT + "\\)", ""); .replaceAll(ReactionContainer.INTERIOR_COMPARTMENT_TOKEN_REG, "")
.replaceAll(ReactionContainer.MIDDLE_COMPARTMENT_TOKEN_REG, "")
.replaceAll(ReactionContainer.EXTERIOR_COMPARTMENT_TOKEN_REG, "");
String[] metabs = reaction.split(" \\+ "); String[] metabs = reaction.split(" \\+ ");
......
...@@ -57,6 +57,8 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties; ...@@ -57,6 +57,8 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class ProvideTransportReactionsToGenes { public class ProvideTransportReactionsToGenes {
public static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber"; public static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber";
public static final String Hpr_FAMILY = "8.A.8.";
public static final String Phosphotransferase_FAMILY = "8.A.7.";
private Map<String, List<AlignmentCapsule>> blastResults; private Map<String, List<AlignmentCapsule>> blastResults;
private RestNeo4jGraphDatabase service; private RestNeo4jGraphDatabase service;
...@@ -64,6 +66,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -64,6 +66,8 @@ public class ProvideTransportReactionsToGenes {
private String[] taxonomy; private String[] taxonomy;
private String organism; private String organism;
// private Map<String, Map<String, Set<ReactionContainer>>> reactionContainers; // private Map<String, Map<String, Set<ReactionContainer>>> reactionContainers;
private Map<String, Map<String, Double>> hprHomologues = new HashMap<>();
private Map<String, Map<String, Double>> phosphotransferaseHomologues = new HashMap<>();
private Map<String, String[]> taxonomies; private Map<String, String[]> taxonomies;
private Map<String, String> organisms; private Map<String, String> organisms;
private Map<String, Set<String>> resultsByEvalue; private Map<String, Set<String>> resultsByEvalue;
...@@ -90,7 +94,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -90,7 +94,7 @@ public class ProvideTransportReactionsToGenes {
private Map<String, Map<String, Double>> reportByEvalue = new HashMap<>(); private Map<String, Map<String, Double>> reportByEvalue = new HashMap<>();
private Map<String, Map<String, Set<String>>> reportByEvalueAux = new HashMap<>(); private Map<String, Map<String, Set<String>>> reportByEvalueAux = new HashMap<>();
private static final Map<String, Integer> GENERATIONS_EXCEPTION_FILE = FilesUtils.readGenerationsLimitFile(FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("ChildsLimits.txt")); // private static final Map<String, Integer> GENERATIONS_EXCEPTION_FILE = FilesUtils.readGenerationsLimitFile(FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("ChildsLimits.txt"));
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class); private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
...@@ -103,10 +107,10 @@ public class ProvideTransportReactionsToGenes { ...@@ -103,10 +107,10 @@ public class ProvideTransportReactionsToGenes {
// homologousGenes = new HashMap<>(); // homologousGenes = new HashMap<>();
// reactionContainers = new HashMap<>(); // reactionContainers = new HashMap<>();
taxonomies = new HashMap<>(); this.taxonomies = new HashMap<>();
organisms = new HashMap<>(); this.organisms = new HashMap<>();
tcNumbersNotPresentInTransytDatabase = new HashSet<>(); this.tcNumbersNotPresentInTransytDatabase = new HashSet<>();
reactionContainersByID = new HashMap<>(); this.reactionContainersByID = new HashMap<>();
this.modelPath = modelPath; this.modelPath = modelPath;
this.taxonomyID = organismProperties.getTaxonomyID(); this.taxonomyID = organismProperties.getTaxonomyID();
...@@ -117,7 +121,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -117,7 +121,7 @@ public class ProvideTransportReactionsToGenes {
this.metabolitesNames = new HashMap<>(); this.metabolitesNames = new HashMap<>();
this.metabolitesFormulas = new HashMap<>(); this.metabolitesFormulas = new HashMap<>();
subunits = new Subunits(); this.subunits = new Subunits();
setDefaultRelationshipsToSearch(); setDefaultRelationshipsToSearch();
// findTaxonomyByTaxonomyID(taxID); // findTaxonomyByTaxonomyID(taxID);
...@@ -206,7 +210,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -206,7 +210,8 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Searching reactions..."); logger.debug("Searching reactions...");
resultsByEvalue = getReactionsForGenesByEvalue(); // resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
Set<String> tcNumbers = identifyTcNumbersForSearch(); Set<String> tcNumbers = identifyTcNumbersForSearch();
...@@ -257,7 +262,8 @@ public class ProvideTransportReactionsToGenes { ...@@ -257,7 +262,8 @@ public class ProvideTransportReactionsToGenes {
// System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey)); // System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey));
Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults, subunitsInDatabase); Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults,
subunitsInDatabase, this.hprHomologues, this.phosphotransferaseHomologues);
// System.out.println("here"); // System.out.println("here");
...@@ -534,6 +540,121 @@ public class ProvideTransportReactionsToGenes { ...@@ -534,6 +540,121 @@ public class ProvideTransportReactionsToGenes {
} }
/**
* @return
*/
private Map<String, Set<String>> getReactionsForGenesByEvalueNewMethod() {
Map<String, Set<String>> results = new LinkedHashMap<>();
reportByEvalue = new HashMap<>();
for(String key : blastResults.keySet()) {
Map<String, Double> auxMap = new HashMap<>();
Map<String, Set<String>> subunitsFound = new HashMap<>();
Map<String, Double> evaluesEntry = new HashMap<>();
Set<String> tcNumbers = new HashSet<>();
Map<String, Double> notAccepted = new HashMap<>();
List<String> positions = new ArrayList<>();
for(AlignmentCapsule capsule: blastResults.get(key)) {
String tcNumber = capsule.getTcdbID();
String geneId = capsule.getTarget();
double evalue = capsule.getEvalue();
String auxId = tcNumber + "@" + geneId;
if(tcNumber.startsWith(Hpr_FAMILY)) {
Map<String, Double> hpr = new HashMap<>();
if(this.hprHomologues.containsKey(auxId))
hpr = this.hprHomologues.get(auxId);
hpr.put(key, evalue);
this.hprHomologues.put(auxId, hpr);
}
else if(tcNumber.startsWith(Phosphotransferase_FAMILY)) {
Map<String, Double> phos = new HashMap<>();
if(this.phosphotransferaseHomologues.containsKey(auxId))
phos = this.phosphotransferaseHomologues.get(auxId);
phos.put(key, evalue);
this.phosphotransferaseHomologues.put(auxId, phos);
}
else {
if(!evaluesEntry.containsKey(tcNumber))
evaluesEntry.put(tcNumber, capsule.getEvalue());
if(evalue <= properties.geteValueThreshold()) {
tcNumbers.add(tcNumber);
auxMap.put(tcNumber, capsule.getEvalue());
}
else if(evalue <= properties.getLimitEvalueAcceptance()){
notAccepted.put(auxId, capsule.getEvalue());
for(int i = 0; i < positions.size(); i++) { //creates a sorted list of the evalues of all entries
if(i == 0 && evalue < notAccepted.get(positions.get(i))) {
positions.add(i, auxId);
break;
}
else if(i > 0 && evalue > notAccepted.get(positions.get(i-1)) && evalue < notAccepted.get(positions.get(i))){
positions.add(i, auxId);
break;
}
}
if(!positions.contains(auxId)) //adds when list is empty and to the end of the list
positions.add(auxId);
}
Set<String> accessions = new HashSet<>();
if(subunitsFound.containsKey(tcNumber))
accessions = subunitsFound.get(tcNumber);
accessions.add(geneId);
subunitsFound.put(tcNumber, accessions);
}
}
double entriesToAccept = Math.ceil(positions.size() * (properties.getPercentageAcceptance() / 100)); //round up
for(int i = 0; i < entriesToAccept; i++) {
String tcNumber = positions.get(i).split("\\@")[0];
tcNumbers.add(tcNumber);
auxMap.put(tcNumber, notAccepted.get(positions.get(i)));
}
this.subunits.addEntry(key, evaluesEntry, subunitsFound);
results.put(key, tcNumbers);
if(auxMap.size() > 0)
reportByEvalue.put(key, auxMap);
}
// for(String key : results.keySet())
// System.out.println(key + "\t" + results.get(key).size());
return results;
}
/** /**
* Merge the results of both methods * Merge the results of both methods
* *
...@@ -544,15 +665,16 @@ public class ProvideTransportReactionsToGenes { ...@@ -544,15 +665,16 @@ public class ProvideTransportReactionsToGenes {
Map<String, GeneContainer> genesContainers) { Map<String, GeneContainer> genesContainers) {
finalResults = new HashMap<>(); finalResults = new HashMap<>();
// resultsByEvalue = new HashMap<String, Set<String>>(); //DELETE ME!!!!!!!!!!!!!!1 if(this.properties.isIgnoreMethod1())
resultsByEvalue = new HashMap<String, Set<String>>();
for(String queryAccession : blastResults.keySet()) { for(String queryAccession : blastResults.keySet()) {
Set<String> reactionsAlreadyAssigned = new HashSet<>(); Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>(); Set<String> accepted = new HashSet<>();
Map<String, Set<String>> res = new HashMap<>(); Map<String, Set<String>> res = new HashMap<>();
boolean save = false; boolean save = false;
...@@ -564,7 +686,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -564,7 +686,7 @@ public class ProvideTransportReactionsToGenes {
// System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc)); // System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc));
if(resultsByEvalue.containsKey(queryAccession)) { if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily(); String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {