Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 290a4d49 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

refactored classification method

parent 67bffede
......@@ -107,6 +107,36 @@
<artifactId>neo4j-java-driver</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>validation</artifactId>
<version>0.0.1-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
<exclusion>
<groupId>
uk.ac.ebi.chebi.webapps.chebiWS.client
</groupId>
<artifactId>chebiWS-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.axis</groupId>
<artifactId>axis-saaj</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
......
......@@ -340,10 +340,10 @@ public class BlastAlignmentTransyt extends Observable implements ModelAlignments
// this.alignments.put(queryID,iterationAlignments);
}
else{
logger.debug(iteration.getIteration().getIterationMessage().concat(" for {}"), queryID);
}
// else{ //Annoying irrelevant message
//
// logger.debug(iteration.getIteration().getIterationMessage().concat(" for {}"), queryID);
// }
}
else{
if(this.sequencesWithoutSimilarities!=null && this.sequencesWithoutSimilarities.contains(queryID)) {
......
......@@ -25,6 +25,7 @@ import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.blast.ncbi_blastparser.BlastOutput;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.transyt.utilities.files.ReadFasta;
/**
* @author ODias
......@@ -121,7 +122,7 @@ public class RunSimilaritySearchTransyt extends Observable implements Observer {
if(!f.exists())
f.mkdir();
CreateGenomeFile.buildSubFastaFiles(path, this.querySequences, queriesSubSetList, queryFilesPaths, numberOfCores);
ReadFasta.buildSubFastaFiles(path, this.querySequences, queriesSubSetList, queryFilesPaths, numberOfCores);
ConcurrentLinkedQueue<AlignmentCapsule> alignmentContainerSet = new ConcurrentLinkedQueue<>();
JAXBContext jc = JAXBContext.newInstance(BlastOutput.class);
......
......@@ -28,7 +28,7 @@ public class InternaldbMetabolites {
public static Set<String> getAllMetabololites() {
List<String[]> data = ReadExcelFile.getData(FILE_PATH);
List<String[]> data = ReadExcelFile.getData(FILE_PATH, true, null);
Set<String> metabolites = new HashSet<>();
......@@ -72,7 +72,7 @@ public class InternaldbMetabolites {
public static Set<String> getAllMetabololites222() {
List<String[]> data = ReadExcelFile.getData(FILE_PATH);
List<String[]> data = ReadExcelFile.getData(FILE_PATH, true, null);
Set<String> metabolites = new HashSet<>();
......
......@@ -127,7 +127,7 @@ public class WriteByMetabolitesID {
// }
// }
Node node = service.getNodeByEntryAndLabel("META:Glucuronides", MetaboliteMajorLabel.MetaCyc);
Node node = service.getNodeByEntryAndLabel("META:CPD0-2232", MetaboliteMajorLabel.MetaCyc);
System.out.println(node.getAllProperties());
......@@ -153,10 +153,10 @@ public class WriteByMetabolitesID {
public static Map<String, Set<TcNumberContainer2>> test(BiosynthMetabolites namesAndIDsContainer, Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service,
Map<String, Set<TcNumberContainer2>> reactionsData, Properties properties) {
try {
Boolean generate = false;
String accession = "P16433";
Boolean generate = true;
String accession = "P04840";
// test2(service, null, null);
......@@ -198,6 +198,8 @@ public class WriteByMetabolitesID {
System.out.println();
}
// new PopulateTransytNeo4jDatabase(data, newData, properties);
}
// JSONFilesUtils.writeJSONTriageReactions(newData);
......@@ -245,6 +247,9 @@ public class WriteByMetabolitesID {
Set<String> synonyms = FetchCompoundsByName.getSynonyms(node, nodeProperties, service);
// if(entryID.matches("META:CPD-9781") || entryID.matches("META:CPD0-2232"))
// System.out.println();
if(entryID.matches("META:.*")) {
synonyms.add(entryID);
}
......
......@@ -39,7 +39,7 @@ public class Reports {
Map<String, TcNumberContainer> backupData = JSONFilesUtils.readDataBackupFile();
Map<String, String> descriptions = new HashMap<>();
List<String[]> data = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\Internal database\\results2.xlsx");
List<String[]> data = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\Internal database\\results2.xlsx", true, null);
for(String[] line : data) {
if(!descriptions.containsKey(line[3]))
......
......@@ -27,7 +27,7 @@ public class Tools {
Set<String> set = new HashSet<>();
List<String[]> file = ReadExcelFile.getData(path);
List<String[]> file = ReadExcelFile.getData(path, true, null);
for(String[] line : file)
set.add(line[8]);
......
......@@ -78,8 +78,10 @@ public class IdentifyReactionsMetabolites {
this.namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer);
this.allMetabolitesByName = new HashMap<>(this.namesAndIDsContainer.getMetabolitesIDs());
System.out.println(this.allMetabolitesByName.containsKey("META:CPD0-2232"));
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
//
// System.out.println("metabolites >>>" + metabolites.size()); //2078
//
......@@ -417,14 +419,14 @@ public class IdentifyReactionsMetabolites {
/**
* Method to identify metabolites present in pt.uminho.ceb.biosystems.transyt.service.reactions.
* Method to identify metabolites present in reactions.
*
* @param data
* @return
*/
private Map<String, Set<String>> getMetabolitesFromReactions(Map<String, Set<TcNumberContainer2>> data){
Map<String, Set<String>> metabolites = new HashMap<>();
Map<String, Set<String>> metabolites = new HashMap<>();
for(String accession : data.keySet()) {
......@@ -436,8 +438,9 @@ public class IdentifyReactionsMetabolites {
String[] reactions = new String[] {reactionContainer.getReaction(),
reactionContainer.getOriginalReaction()};
for(String reaction : reactions) {
for(Entry<String, Set<String>> entry : getMetabolitesToBeReplaced(reaction,
dictionary, reactionContainer.isCombineSameMetabolite()).entrySet()) {
if(metabolites.containsKey(entry.getKey()))
......@@ -464,7 +467,9 @@ public class IdentifyReactionsMetabolites {
Set<String> metabolites = new HashSet<>();
reaction = reaction.replaceAll(ReactionContainer.REV_TOKEN, "\\+").replaceAll(ReactionContainer.IRREV_TOKEN, "\\+")
.replaceAll("\\(" + ReactionContainer.INTERIOR_COMPARTMENT + "\\)", "").replaceAll("\\(" + ReactionContainer.EXTERIOR_COMPARTMENT + "\\)", "");
.replaceAll(ReactionContainer.INTERIOR_COMPARTMENT_TOKEN_REG, "")
.replaceAll(ReactionContainer.MIDDLE_COMPARTMENT_TOKEN_REG, "")
.replaceAll(ReactionContainer.EXTERIOR_COMPARTMENT_TOKEN_REG, "");
String[] metabs = reaction.split(" \\+ ");
......
......@@ -57,6 +57,8 @@ import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class ProvideTransportReactionsToGenes {
public static final String NO_TCNUMBER_ASSOCIATED = "Undefined_TCnumber";
public static final String Hpr_FAMILY = "8.A.8.";
public static final String Phosphotransferase_FAMILY = "8.A.7.";
private Map<String, List<AlignmentCapsule>> blastResults;
private RestNeo4jGraphDatabase service;
......@@ -64,6 +66,8 @@ public class ProvideTransportReactionsToGenes {
private String[] taxonomy;
private String organism;
// private Map<String, Map<String, Set<ReactionContainer>>> reactionContainers;
private Map<String, Map<String, Double>> hprHomologues = new HashMap<>();
private Map<String, Map<String, Double>> phosphotransferaseHomologues = new HashMap<>();
private Map<String, String[]> taxonomies;
private Map<String, String> organisms;
private Map<String, Set<String>> resultsByEvalue;
......@@ -90,7 +94,7 @@ public class ProvideTransportReactionsToGenes {
private Map<String, Map<String, Double>> reportByEvalue = new HashMap<>();
private Map<String, Map<String, Set<String>>> reportByEvalueAux = new HashMap<>();
private static final Map<String, Integer> GENERATIONS_EXCEPTION_FILE = FilesUtils.readGenerationsLimitFile(FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("ChildsLimits.txt"));
// private static final Map<String, Integer> GENERATIONS_EXCEPTION_FILE = FilesUtils.readGenerationsLimitFile(FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("ChildsLimits.txt"));
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
......@@ -103,10 +107,10 @@ public class ProvideTransportReactionsToGenes {
// homologousGenes = new HashMap<>();
// reactionContainers = new HashMap<>();
taxonomies = new HashMap<>();
organisms = new HashMap<>();
tcNumbersNotPresentInTransytDatabase = new HashSet<>();
reactionContainersByID = new HashMap<>();
this.taxonomies = new HashMap<>();
this.organisms = new HashMap<>();
this.tcNumbersNotPresentInTransytDatabase = new HashSet<>();
this.reactionContainersByID = new HashMap<>();
this.modelPath = modelPath;
this.taxonomyID = organismProperties.getTaxonomyID();
......@@ -117,7 +121,7 @@ public class ProvideTransportReactionsToGenes {
this.metabolitesNames = new HashMap<>();
this.metabolitesFormulas = new HashMap<>();
subunits = new Subunits();
this.subunits = new Subunits();
setDefaultRelationshipsToSearch();
// findTaxonomyByTaxonomyID(taxID);
......@@ -206,7 +210,8 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Searching reactions...");
resultsByEvalue = getReactionsForGenesByEvalue();
// resultsByEvalue = getReactionsForGenesByEvalue();
resultsByEvalue = getReactionsForGenesByEvalueNewMethod(); //new method
Set<String> tcNumbers = identifyTcNumbersForSearch();
......@@ -257,7 +262,8 @@ public class ProvideTransportReactionsToGenes {
// System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey));
Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults, subunitsInDatabase);
Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults,
subunitsInDatabase, this.hprHomologues, this.phosphotransferaseHomologues);
// System.out.println("here");
......@@ -534,6 +540,121 @@ public class ProvideTransportReactionsToGenes {
}
/**
* @return
*/
private Map<String, Set<String>> getReactionsForGenesByEvalueNewMethod() {
Map<String, Set<String>> results = new LinkedHashMap<>();
reportByEvalue = new HashMap<>();
for(String key : blastResults.keySet()) {
Map<String, Double> auxMap = new HashMap<>();
Map<String, Set<String>> subunitsFound = new HashMap<>();
Map<String, Double> evaluesEntry = new HashMap<>();
Set<String> tcNumbers = new HashSet<>();
Map<String, Double> notAccepted = new HashMap<>();
List<String> positions = new ArrayList<>();
for(AlignmentCapsule capsule: blastResults.get(key)) {
String tcNumber = capsule.getTcdbID();
String geneId = capsule.getTarget();
double evalue = capsule.getEvalue();
String auxId = tcNumber + "@" + geneId;
if(tcNumber.startsWith(Hpr_FAMILY)) {
Map<String, Double> hpr = new HashMap<>();
if(this.hprHomologues.containsKey(auxId))
hpr = this.hprHomologues.get(auxId);
hpr.put(key, evalue);
this.hprHomologues.put(auxId, hpr);
}
else if(tcNumber.startsWith(Phosphotransferase_FAMILY)) {
Map<String, Double> phos = new HashMap<>();
if(this.phosphotransferaseHomologues.containsKey(auxId))
phos = this.phosphotransferaseHomologues.get(auxId);
phos.put(key, evalue);
this.phosphotransferaseHomologues.put(auxId, phos);
}
else {
if(!evaluesEntry.containsKey(tcNumber))
evaluesEntry.put(tcNumber, capsule.getEvalue());
if(evalue <= properties.geteValueThreshold()) {
tcNumbers.add(tcNumber);
auxMap.put(tcNumber, capsule.getEvalue());
}
else if(evalue <= properties.getLimitEvalueAcceptance()){
notAccepted.put(auxId, capsule.getEvalue());
for(int i = 0; i < positions.size(); i++) { //creates a sorted list of the evalues of all entries
if(i == 0 && evalue < notAccepted.get(positions.get(i))) {
positions.add(i, auxId);
break;
}
else if(i > 0 && evalue > notAccepted.get(positions.get(i-1)) && evalue < notAccepted.get(positions.get(i))){
positions.add(i, auxId);
break;
}
}
if(!positions.contains(auxId)) //adds when list is empty and to the end of the list
positions.add(auxId);
}
Set<String> accessions = new HashSet<>();
if(subunitsFound.containsKey(tcNumber))
accessions = subunitsFound.get(tcNumber);
accessions.add(geneId);
subunitsFound.put(tcNumber, accessions);
}
}
double entriesToAccept = Math.ceil(positions.size() * (properties.getPercentageAcceptance() / 100)); //round up
for(int i = 0; i < entriesToAccept; i++) {
String tcNumber = positions.get(i).split("\\@")[0];
tcNumbers.add(tcNumber);
auxMap.put(tcNumber, notAccepted.get(positions.get(i)));
}
this.subunits.addEntry(key, evaluesEntry, subunitsFound);
results.put(key, tcNumbers);
if(auxMap.size() > 0)
reportByEvalue.put(key, auxMap);
}
// for(String key : results.keySet())
// System.out.println(key + "\t" + results.get(key).size());
return results;
}
/**
* Merge the results of both methods
*
......@@ -544,15 +665,16 @@ public class ProvideTransportReactionsToGenes {
Map<String, GeneContainer> genesContainers) {
finalResults = new HashMap<>();
// resultsByEvalue = new HashMap<String, Set<String>>(); //DELETE ME!!!!!!!!!!!!!!1
if(this.properties.isIgnoreMethod1())
resultsByEvalue = new HashMap<String, Set<String>>();
for(String queryAccession : blastResults.keySet()) {
Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>();
Map<String, Set<String>> res = new HashMap<>();
boolean save = false;
......@@ -564,7 +686,7 @@ public class ProvideTransportReactionsToGenes {
// System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc));
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
......@@ -661,6 +783,146 @@ public class ProvideTransportReactionsToGenes {
// System.exit(0);
}
/**
* Merge the results of both methods
*
* @param similaritiesResults
* @param genesContainers
*/
private void generateFinalResultsAuxValidation_filter_reactions(Map<String, Set<String>> similaritiesResults,
Map<String, GeneContainer> genesContainers) {
Set<String> compounds = new HashSet<>();
finalResults = new HashMap<>();
if(this.properties.isIgnoreMethod1())
resultsByEvalue = new HashMap<String, Set<String>>();
for(String queryAccession : blastResults.keySet()) {
Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>();
Map<String, Set<String>> res = new HashMap<>();
boolean save = false;
// boolean in = true;
// boolean both = false;
// for(String tc : reactionsByTcNumberForAnnotation.keySet())
// System.out.println("new" + tc + "\t" + reactionsByTcNumberForAnnotation.get(tc));
if(resultsByEvalue.containsKey(queryAccession)) {
String tcFamily = genesContainers.get(queryAccession).getAnnotatedFamily();
for(String tcNumber : resultsByEvalue.get(queryAccession)) {
if(reactionsByTcNumberForAnnotation.containsKey(tcNumber) && tcNumber.contains(tcFamily)){
Set<String> reactions = reactionsByTcNumberForAnnotation.get(tcNumber);
// if(!reactions.isEmpty())
// System.out.println(reactions);
// System.out.println("dddd " + reactions);
Iterator<String> iterator = reactions.iterator();
while (iterator.hasNext()) {
String id = iterator.next();
if(!reactionsAlreadyAssigned.contains(id) || accepted.contains(id)) {
reactionsAlreadyAssigned.add(id);
accepted.add(id);
if(!res.containsKey(tcNumber))
res.put(tcNumber, new HashSet<String>());
Set<String> set = res.get(tcNumber);
set.add(id);
res.put(tcNumber, set);
compounds.addAll(this.reactionContainersByID.get(id).getMetabolites());
save = true;
}
}
}
}
if(res.size() > 0) {
reportByEvalueAux.put(queryAccession, new HashMap<>(res));
}
}
if(similaritiesResults.containsKey(queryAccession) && !similaritiesResults.get(queryAccession).isEmpty()) {
Set<String> toAdd = new HashSet<>();
Set<String> reactions = similaritiesResults.get(queryAccession);
Iterator<String> iterator = reactions.iterator();
while (iterator.hasNext()) {
String id = iterator.next();
if(!reactionsAlreadyAssigned.contains(id)) {
for(String compound : this.reactionContainersByID.get(id).getMetabolites()) {
if(!compounds.contains(compound)) { //complete the results of the first method and avoid creating bad GPRs
reactionsAlreadyAssigned.add(id);
toAdd.add(id);
save = true;
break;
}
}
}
}
if(!toAdd.isEmpty())
res.put(NO_TCNUMBER_ASSOCIATED, toAdd);
}
// if(queryAccession.equals("NP_415185.1")) {
// System.out.println("AQUI!!!!!!! ");
//
// for(String key : res.keySet())
// System.out.println(key + "\t" + res.get(key));
// }
// if(!save && !similaritiesResults.get(queryAccession).isEmpty())
// res.put("No TC family", similaritiesResults.get(queryAccession));
String name = queryAccession.split("\\s+")[0];
// if(locus.containsKey(name))
// name = locus.get(name);
if(save)
finalResults.put(name, res);
}
// System.exit(0);
}
/**
* Merge the results of both methods
*
......@@ -1213,7 +1475,9 @@ public class ProvideTransportReactionsToGenes {
if(!tcNumbersNotPresentInTransytDatabase.contains(tcNumber)) {