Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit fa922128 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

GPR associations development

parent 6d18504a
......@@ -194,7 +194,7 @@ public class WriteByMetabolitesID {
// reactionsData3.put("O51235", reactionsData.get("O51235"));
System.out.println(tcdbMetabolitesIDs);
// System.out.println(tcdbMetabolitesIDs);
/////TRIAGE
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data, namesAndIDsContainer.getMetabolitesIDs(), properties).getResults(); //uncomment
......
......@@ -42,7 +42,7 @@ public class IdentifyReactionsMetabolites {
metabolites = getMetabolitesFromReactions(reactionsData).keySet();
System.out.println(metabolites);
// System.out.println(metabolites);
logger.info("Total metabolites for search: {}", metabolites.size());
......@@ -419,7 +419,7 @@ public class IdentifyReactionsMetabolites {
metabolites.add(metab.trim());
}
System.out.println(metabolites);
// System.out.println(metabolites);
return standardizationOfNames3(metabolites, dictionary);
}
......
......@@ -38,6 +38,7 @@ import kbase.Reports;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.TaxonomyContainer;
import pt.uminho.ceb.biosystems.merlin.biocomponents.io.writers.SBMLLevel3Writer;
import pt.uminho.ceb.biosystems.merlin.biocomponents.io.writers.SBMLWriter;
import pt.uminho.ceb.biosystems.merlin.database.connector.databaseAPI.TransportersAPI;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.Container;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.ContainerUtils;
......@@ -49,6 +50,7 @@ import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.exceptions.Reacti
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.ErrorsException;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLReader;
import pt.uminho.ceb.biosystems.mew.biocomponents.validation.io.JSBMLValidationException;
import relations.GPRAssociations;
import tcdb.capsules.GeneContainer;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.Subunits;
......@@ -291,7 +293,7 @@ public class ProvideTransportReactionsToGenes {
Set<String> tcNumbers = identifyTcNumbersForSearch();
this.subunits.setSubunitsDatabase(findSubunitsInDatabase(tcNumbers));
// this.subunits.setSubunitsDatabase(findSubunitsInDatabase(tcNumbers));
reactionsByTcNumber = getReactionsByTcNumber(tcNumbers);
......@@ -313,28 +315,71 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Reactions search complete!");
// Map<String, String> reactionsIDS = null;
Scanner reader = new Scanner(System.in);
Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
int n = 1;
//
// while (n != 1001) {
System.out.println("1");
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(service.findSubunitsInDatabase(tcNumbers), blastResults, genesContainers);
System.out.println("Insert a number to repeat or 1001 to finish");
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
System.out.println("2");
// reader.close();
Container container = new Container(output);
container.verifyDepBetweenClass();
System.out.println("3");
// String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
// TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
// sbml.writeToFile();
// Map<String, String> reactionsIDS = null;
validation(container, reactionsIDS, path);
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
// System.out.println("1");
//
// OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
//
// System.out.println("2");
//
// Container container = new Container(output);
// container.verifyDepBetweenClass();
// int n = 1;
//
while (n != 1001) {
try {
// GPRAssociations.buildGeneRules(service, container, proteinComplexes, reactionContainersByID);
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
}
// }
//
// System.out.println("3");
//
// // String fileName = "sbml".concat("_qCov_").concat(Double.toString(properties.getQueryCoverage())).concat("_eValThresh_").concat(Double.toString(properties.geteValueThreshold())).concat(".json");
//
// // TriageSBMLLevel3Writer sbml = new TriageSBMLLevel3Writer(path.concat("SBML\\").concat(fileName), container, taxID.toString(), false);
//
// // sbml.writeToFile();
//
// validation(container, reactionsIDS, path);
}
catch (Exception e) {
......@@ -423,38 +468,6 @@ public class ProvideTransportReactionsToGenes {
}
private Map<String, Set<String>> findSubunitsInDatabase(Set<String> tcNumbers) {
Map<String, Set<String>> subunits = new HashMap<>();
for(String tcNumber : tcNumbers) {
Node tcNode = service.findTcNumberNode(tcNumber);
if(tcNode != null) {
Iterable<Relationship> relations = tcNode.getRelationships(TriageRelationshipType.has_tc);
for(Relationship rel : relations) {
Set<String> accessions = new HashSet<>();
String acc = rel.getStartNode().getProperty(TriageGeneralProperties.Accession_Number.toString()).toString();
if(subunits.containsKey(tcNumber))
accessions = subunits.get(tcNumber);
accessions.add(acc);
subunits.put(tcNumber, accessions);
}
}
}
return subunits;
}
/**
* Merge the results of both methods
*
......@@ -658,7 +671,7 @@ public class ProvideTransportReactionsToGenes {
}
}
String format = "_p_c.txt";
String format = "_e_p.txt";
FilesUtils.saveWordsInFile(path + "duplicates" + format, duplicate);
......@@ -945,6 +958,8 @@ public class ProvideTransportReactionsToGenes {
private Map<String, GeneContainer> buildGenesContainers() {
Map<String, GeneContainer> genes = new HashMap<>();
Set<String> tcFamilies = new HashSet<>();
for(String queryAccession : blastResults.keySet()) {
......@@ -1018,10 +1033,17 @@ public class ProvideTransportReactionsToGenes {
String tcFamily = ReactionsPredictor.annotateTcFamily(totalEntries, similaritySum, familiesFrequency, familiesSimilarity);
tcFamilies.add(tcFamily);
// transportType.put(queryAccession, getTransportType(queryAccession));
genes.put(queryAccession, new GeneContainer(sequence, homologousGenes, counts, similarities, taxonomy.length + 1, tcFamily));
}
for(String tc : tcFamilies)
System.out.println(tc);
System.out.println("FAMILIES: " + tcFamilies.size());
return genes;
}
......
package relations;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import files.FilesUtils;
import files.ReadExcelFile;
import files.WriteExcel;
import pt.uminho.ceb.biosystems.merlin.utilities.Pair;
import pt.uminho.ceb.biosystems.merlin.utilities.RulesParser;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.Container;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.components.ReactionCI;
import pt.uminho.ceb.biosystems.mew.biocomponents.container.components.ReactionTypeEnum;
import tcdb.capsules.GeneContainer;
import tcdb.capsules.ReactionContainer;
import triageDatabase.TriageGeneralProperties;
import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageRelationshipType;
import utilities.triage_utilities.Utilities;
public class GPRAssociations {
public static Map<String, Map<String, String>> getGPR(Map<String, Set<String>> complexesTCDB, Map<String, List<AlignmentCapsule>> blastResults, Map<String, GeneContainer> genesContainers) {
System.out.println("comeou");
Map<String, List<AlignmentCapsule>> filteredBlastResults = filterResults(blastResults, genesContainers);
System.out.println(filteredBlastResults.size());
Map<String, Map<String, Map<String, Double>>> resultsByTCnumber = new HashMap<>();
for(String gene : filteredBlastResults.keySet()) {
Map<String, Map<String, Double>> subunits = new HashMap<>();
for(AlignmentCapsule container : filteredBlastResults.get(gene)) {
String tcNumber = container.getTcdbID();
String accession = container.getTarget();
if(complexesTCDB.containsKey(tcNumber) && complexesTCDB.get(tcNumber).size() > 1
&& complexesTCDB.get(tcNumber).contains(accession)) {
if(!resultsByTCnumber.containsKey(tcNumber))
resultsByTCnumber.put(tcNumber, new HashMap<>());
if(!resultsByTCnumber.get(tcNumber).containsKey(accession))
resultsByTCnumber.get(tcNumber).put(accession, new HashMap<>());
subunits = resultsByTCnumber.get(tcNumber);
subunits.get(accession).put(gene, container.getBitScore());
resultsByTCnumber.put(tcNumber, subunits);
}
}
}
// for(String key : resultsByTCnumber.keySet())
// System.out.println(key + "\t" + resultsByTCnumber.get(key));
Map<String, Map<String, String>> GPR = new HashMap<>();
for(String tcNumber : new HashSet<>(resultsByTCnumber.keySet())) {
if(resultsByTCnumber.get(tcNumber).keySet().size() == complexesTCDB.get(tcNumber).size()) {
Map<String, String> assigned = new HashMap<>();
Map<String, Set<String>> invertedMapping = new HashMap<>();
System.out.println(tcNumber);
for(String acc : resultsByTCnumber.get(tcNumber).keySet()) {
System.out.println(resultsByTCnumber.get(tcNumber));
// else if(resultsByTCnumber.get(tcNumber).get(acc).size() == 1) {
//
// for(String queryGene : resultsByTCnumber.get(tcNumber).get(acc).keySet())
// assigned.put(acc, queryGene);
// }
for(String queryGene : resultsByTCnumber.get(tcNumber).get(acc).keySet()) {
if(!invertedMapping.containsKey(queryGene))
invertedMapping.put(queryGene, new HashSet<>());
invertedMapping.get(queryGene).add(acc);
}
}
System.out.println(invertedMapping);
if(!resultsByTCnumber.isEmpty())
assigned = findBestSubunits(resultsByTCnumber.get(tcNumber), assigned, invertedMapping);
GPR.put(tcNumber, assigned);
System.out.println(tcNumber + "\t" + GPR.get(tcNumber));
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
}
}
System.out.println("tamanho dos resultados: " + GPR.size());
for(String key : GPR.keySet())
System.out.println(key + "\t" + GPR.get(key));
System.out.println("terminou");
return GPR;
}
/**
* @param blastResults
* @param genesContainers
* @return
*/
private static Map<String, List<AlignmentCapsule>> filterResults(Map<String, List<AlignmentCapsule>> blastResults,
Map<String, GeneContainer> genesContainers) {
Map<String, List<AlignmentCapsule>> filteredBlastResults = new HashMap<>();
for(String queryGene : blastResults.keySet()) {
for(AlignmentCapsule capsule : blastResults.get(queryGene)) {
if(capsule.getTcdbID().contains(genesContainers.get(queryGene).getAnnotatedFamily())) {
if(!filteredBlastResults.containsKey(queryGene))
filteredBlastResults.put(queryGene, new ArrayList<>());
filteredBlastResults.get(queryGene).add(capsule);
}
}
}
return filteredBlastResults;
}
/**
* @param data
* @param assigned
* @param invertedMapping
* @return
*/
private static Map<String, String> findBestSubunits(Map<String, Map<String, Double>> data, Map<String, String> assigned, Map<String, Set<String>> invertedMapping){
System.out.println();
if(assigned.size() == data.size())
return assigned;
System.out.println("1");
Set<String> allAccessionRemaining = new HashSet<>(data.keySet());
for(String queryGene : new HashSet<>(invertedMapping.keySet())) {
invertedMapping.get(queryGene).removeAll(assigned.keySet());
// allAccessionRemaining.addAll(invertedMapping.get(queryGene));
}
allAccessionRemaining.removeAll(assigned.values());
System.out.println("2");
System.out.println(allAccessionRemaining);
Pair<String, String> pair = findBestGene(data, allAccessionRemaining, invertedMapping, assigned);
if(pair == null)
return null;
System.out.println(3);
String queryGene = pair.getB();
String accession = pair.getA();
// for(String queryGene : new HashSet<>(invertedMapping.keySet())) {
System.out.println(queryGene);
System.out.println(assigned.keySet());
if(!assigned.containsValue(queryGene)) {
System.out.println("3.1");
if(invertedMapping.get(queryGene).size() == 0) {
System.out.println("4");
return null;
}
else if(invertedMapping.get(queryGene).size() == 1) {
System.out.println("5");
// for(String acc : invertedMapping.get(queryGene))
// assigned.put(acc, queryGene);
assigned.put(queryGene, accession);
System.out.println(assigned);
// invertedMapping.keySet().remove(queryGene);
return findBestSubunits(data, assigned, invertedMapping);
}
System.out.println("6");
// allAccessionRemaining.addAll(invertedMapping.get(queryGene));
}
// }
System.out.println("7");
// Pair<String, String> pair = findBestGene(data, allAccessionRemaining, invertedMapping);
assigned.put(queryGene, accession);
System.out.println(assigned);
return findBestSubunits(data, assigned, invertedMapping);
}
/**
* @param data
* @param accession
* @param invertedMapping
* @param assigned
* @return
*/
private static Pair<String, String> findBestGene(Map<String, Map<String, Double>> data, Set<String> allAccessionRemaining, Map<String, Set<String>> invertedMapping, Map<String, String> assigned){
boolean found = false;
double val = 0.0;
String gene = "";
String accession = "";
Set<String> exclude = new HashSet<>();
for(String acc : allAccessionRemaining) {
if(data.get(acc).size() == 1)
return new Pair<String, String>(acc, data.get(acc).keySet().iterator().next());
}
while(!found) {
Map<String, Map<String, Double>> dataClone = new HashMap<>(data);
val = -1.0;
gene = "";
for(String acc : allAccessionRemaining) {
for(String queryGene : data.get(acc).keySet()) {
if(!exclude.contains(queryGene)) {
double currentVal = data.get(acc).get(queryGene);
if(currentVal > val) {
gene = queryGene;
val = currentVal;
accession = acc;
found = true;
}
}
}
}
System.out.println(">>" + gene);
if(gene.isEmpty() && accession.isEmpty()) {
System.out.println("returning null...");
return null;
}
Set<String> allGenes = new HashSet<>();
for(String newAcc : new HashSet<>(dataClone.keySet())) {
dataClone.get(newAcc).remove(gene);
allGenes.addAll(dataClone.get(newAcc).keySet());
allGenes.removeAll(assigned.keySet());
}
System.out.println(allGenes.size() + "\t" + allGenes);
System.out.println(allAccessionRemaining.size() + "\t" + allAccessionRemaining);
if(allGenes.size() < allAccessionRemaining.size() - 1) { //-1 because off the own accession
found = false;
exclude.add(gene); // redo the search for the highest similarit, excluding this gene from further searches
gene = "";
accession = "";
}
}
return new Pair<String, String>(accession, gene);
}
/**
* @param service
* @param container
* @param proteinComplexes
* @param reactionContainersByID
*/
public static void buildGeneRules(TriageGraphDatabaseService service, Container container, Map<String, Map<String, String>> proteinComplexes,
Map<String, ReactionContainer> reactionContainersByID) {
// Map<String, String> genes = FilesUtils.readMapFromFile("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\geneRules.txt");
Map<String, String> locus = FilesUtils.readMapFromFile("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\Acc_to_locus.txt");
List<String[]> allData = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\AllTRIAGEData_V13.xlsx");
List<String[]> excel = new ArrayList<>();
System.out.println("start");
for(String[] line : allData) {
// if(line[0].equals("iTRnad__5cpd00067i_cpd00004_cpd15560")) {
String react = line[0].trim();
String geneRule = "";
String[] newline = new String[2];
// ReactionCI reaction = container.getReaction(react);
// System.out.println(react);
Iterable<Relationship> relationships = service.findReactionNode(react.replace("CoA", "abc")).getRelationships();
Set<String> TCs = new HashSet<>();
for(Relationship rel : relationships) {
TCs.add(rel.getStartNode().getProperty(TriageGeneralProperties.TC_Number.toString()).toString());
}
List<Set<String>> allRules = new ArrayList<>();
for(String tcNumber : TCs) {
if(proteinComplexes.containsKey(tcNumber)) {