Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 8cc73050 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

gpr bug fixed

parent ad515ae2
......@@ -4,7 +4,7 @@
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>service</artifactId>
<name>service</name>
<version>0.0.10-SNAPSHOT</version>
<version>0.0.11-SNAPSHOT</version>
<build>
<plugins>
<plugin>
......
......@@ -4,7 +4,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>service</artifactId>
<version>0.0.10-SNAPSHOT</version>
<version>0.0.11-SNAPSHOT</version>
<properties>
......@@ -111,7 +111,7 @@
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>utilities</artifactId>
<version>0.0.10-SNAPSHOT</version>
<version>0.0.11-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
......@@ -141,7 +141,7 @@
<dependency>
<groupId>pt.uminho.ceb.biosystems.transyt</groupId>
<artifactId>scraper</artifactId>
<version>0.0.10-SNAPSHOT</version>
<version>0.0.11-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>uk.ac.ebi.uniprot</groupId>
......
......@@ -65,8 +65,6 @@ public class Blast implements Observer{
this.workFolderID = workFolderID;
// blastDirectory = FilesUtils.getBlastDirectory();
results = performBlast();
logger.info("Blast process finished!");
......@@ -79,8 +77,6 @@ public class Blast implements Observer{
catch (Exception e) {
e.printStackTrace();
}
}
/**
......@@ -91,33 +87,12 @@ public class Blast implements Observer{
*/
private ConcurrentLinkedQueue<AlignmentCapsule> performBlast() throws Exception {
//BLAST
// String filePath = "C:/Users/Davide/Documents/reactionsBuilderTriage/temp/";
String tcdbFastaFile = ReadFastaTcdb.getPathFastaLastKnownVersion(); //uncomment
// String tcdbFastaFile = FilesUtils.getBackupTCDBFASTADirectory().concat("tcdbFasta.faa"); // DELETE ME
// String queryFastaFile = "C:\\Users\\Davide\\Downloads\\NC_all.txt";
// String queryFastaFile = "C:\\Users\\Davide\\Downloads\\GCF_001951175.1_ASM195117v1_protein.faa\\GCF_001951175.1_ASM195117v1_protein.faa";
// String queryFastaFile = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Genomes\\Bacillus glycinifermentans\\GCF_900093775.1_EVONIK_BGLY_protein.faa";
String tcdbFastaFile = ReadFastaTcdb.getPathFastaForAlignmentLastKnownVersion();
// logger.info("Downloading FASTA file from TCDB at: "); //incluir endereço e excepçoes caso nao consiga aceder à net
Map<String, AbstractSequence<?>> tcdbGenes = getTcdbInMapFormat();
logger.debug("Reading TCDB fasta in local folder: {}", tcdbFastaFile); //indicar o caminho
CreateGenomeFile.buildFastaFile(tcdbFastaFile, tcdbGenes);
logger.debug("Reading TCDB fasta in local folder: {}", tcdbFastaFile);
logger.debug("Reading given target genome FASTA at: {}", queryFilePath); //indicar caminho
logger.debug("Reading given target genome FASTA at: {}", queryFilePath);
// System.out.println("temp" + "\t" + blastDirectory);
//
// System.out.println(queryFilePath);
ConcurrentHashMap<String, AbstractSequence<?>> sequences= new ConcurrentHashMap<String, AbstractSequence<?>>();
sequences.putAll(FastaReaderHelper.readFastaProteinSequence(new File(queryFilePath)));
......@@ -125,7 +100,7 @@ public class Blast implements Observer{
logger.info("Blast process initialized!");
RunSimilaritySearchTransyt run_similaritySearch = new RunSimilaritySearchTransyt(forceBlast, tcdbGenes, properties.getSimilarityThreshold(),
RunSimilaritySearchTransyt run_similaritySearch = new RunSimilaritySearchTransyt(forceBlast, properties.getSimilarityThreshold(),
Method.SmithWaterman, sequences, new AtomicBoolean(false), new AtomicInteger(0), AlignmentScoreType.ALIGNMENT);
run_similaritySearch.setSubjectFastaFilePath(tcdbFastaFile);
......@@ -139,43 +114,6 @@ public class Blast implements Observer{
return results;
}
/**
*
* @param url
* @return
* @throws Exception
*/
public static Map<String, AbstractSequence<?>> getTcdbInMapFormat() throws Exception {
// InputStream tcdbInputStream = (new URL(TcdbExplorer.TCDB_FASTA_URL)).openStream();
// BufferedReader br= new BufferedReader(new InputStreamReader(tcdbInputStream));
BufferedReader br = ReadFastaTcdb.getTcdbFasta(true);
StringBuilder sb = new StringBuilder();
String line;
while ((line = br.readLine()) != null)
sb.append(line.concat("\n"));
String theString = sb.toString().replace("</p>", "").replace("<p>", "").replace(">gnl|TC-DB|xxxxxx 3.A.1.205.14 \ndsfgdfg", "");
byte[] bytes = theString.getBytes("utf-8");
InputStream tcdbInputStream = new ByteArrayInputStream(bytes);
FastaReader<ProteinSequence,AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence,AminoAcidCompound>(
tcdbInputStream,
//tcdbFile,
new GenericFastaHeaderParser<ProteinSequence,AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
// System.out.println("CHECK3---->"+LocalDateTime.now().getHour()+":"+LocalDateTime.now().getMinute()+":"+LocalDateTime.now().getSecond());
Map<String, AbstractSequence<?>> tcdb = new HashMap<>();
tcdb.putAll(fastaReader.process());
return tcdb;
}
@Override
public void update(Observable arg0, Object arg1) {
// TODO Auto-generated method stub
......
......@@ -66,7 +66,7 @@ public class RunSimilaritySearchTransyt extends Observable implements Observer {
* @param alignmentScoreType
* @throws Exception
*/
public RunSimilaritySearchTransyt(boolean forceBlast, Map<String, AbstractSequence<?>> staticGenesSet, double similarity_threshold, Method method, ConcurrentHashMap<String, AbstractSequence<?>> querySequences,
public RunSimilaritySearchTransyt(boolean forceBlast, double similarity_threshold, Method method, ConcurrentHashMap<String, AbstractSequence<?>> querySequences,
AtomicBoolean cancel, AtomicInteger querySize, AlignmentScoreType alignmentScoreType) throws Exception {
this.forceBlast = forceBlast;
......
......@@ -95,7 +95,7 @@ public class WriteByMetabolitesID {
//
// n = reader.nextInt();
// }
//
// }
dataTx.failure();
dataTx.close();
......@@ -232,7 +232,7 @@ public class WriteByMetabolitesID {
try {
// Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>();
// String[] accessions = new String[] {"P02931"};
// String[] accessions = new String[] {"P75757", "O85143", "O07084", "P13512", "A0QQH3"};
//
// for(String acc : accessions)
// reactionsData2.put(acc, reactionsData.get(acc));
......@@ -241,7 +241,7 @@ public class WriteByMetabolitesID {
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(reactionsData, service, data, namesAndIDsContainer, properties).getResults(); //uncomment
// for(TcNumberContainer2 container : newData.get("P02931")) {
// for(TcNumberContainer2 container : newData.get("P75757")) {
//
// System.out.println(container.getTcNumber());
//
......@@ -263,8 +263,8 @@ public class WriteByMetabolitesID {
//
// System.out.println();
// }
////
// System.out.println(data.containsKey("D07664"));
// test2(service);
......
......@@ -249,14 +249,15 @@ public class ProvideTransportReactionsToGenes {
Map<String, Set<String>> subunitsInDatabase = service.findAllSubunitsInDatabase();
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(subunitsInDatabase, blastResults, genesContainers);
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(subunitsInDatabase,
blastResults, genesContainers, properties);
// for(String key : finalResults.keySet())
// for(String newkey : finalResults.get(key).keySet())
// System.out.println(key + "\t" + newkey + "\t" + finalResults.get(key).get(newkey));
Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults);
Map<String, String> geneRules = GPRAssociations.buildGeneRules(service, proteinComplexes, finalResults, subunitsInDatabase);
// System.out.println("here");
......@@ -550,6 +551,9 @@ public class ProvideTransportReactionsToGenes {
// FilesUtils.readMapFromFile(path.concat("Acc_to_locus.txt"));
for(String queryAccession : resultsByEvalue.keySet()) {
// if(queryAccession.equals("b4087"))
// System.out.println();
Set<String> reactionsAlreadyAssigned = new HashSet<>();
Set<String> accepted = new HashSet<>();
......
......@@ -11,6 +11,7 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.neo4j.graphdb.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -546,6 +547,9 @@ public class TransportReactionsBuilder {
}
}
if(StringUtils.countMatches(reaction, " + ") >= distribution.size())
go = false; //this means that the same metabolite is twice+ in the reaction
if(go && formulasReaction != null) {
// System.out.println(reaction);
......@@ -707,9 +711,12 @@ public class TransportReactionsBuilder {
}
else if(defaultLabel.equals(MetaboliteMajorLabel.LigandCompound))
newReactContainer.setReactionKEGG(auxReaction);
if(reactionBase.contains("cpd00205") && newReactContainer.getMetabolites().size() == 2)
System.out.println();
newReactContainer.generateTranSyTID(metabolitesMapById, this.allExistingReactions, tcNumber);
String metaId = newReactContainer.getMetaReactionID();
// if(!reactionsAlreadyGenerated.contains(metaId)) {
......
......@@ -5,6 +5,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.slf4j.Logger;
......@@ -15,12 +16,14 @@ import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCa
import pt.uminho.ceb.biosystems.transyt.service.internalDB.WriteByMetabolitesID;
import pt.uminho.ceb.biosystems.transyt.service.neo4jRest.RestNeo4jGraphDatabase;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.GeneContainer;
import pt.uminho.ceb.biosystems.transyt.utilities.transytUtilities.Properties;
public class GPRAssociations {
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
public static Map<String, Map<String, String>> getGPR(Map<String, Set<String>> complexesTCDB, Map<String, List<AlignmentCapsule>> blastResults, Map<String, GeneContainer> genesContainers) {
public static Map<String, Map<String, String>> getGPR(Map<String, Set<String>> complexesTCDB,
Map<String, List<AlignmentCapsule>> blastResults, Map<String, GeneContainer> genesContainers, Properties properties) {
Map<String, List<AlignmentCapsule>> filteredBlastResults = filterResults(blastResults, genesContainers);
......@@ -31,11 +34,12 @@ public class GPRAssociations {
Map<String, Map<String, Double>> subunits = new HashMap<>();
for(AlignmentCapsule container : filteredBlastResults.get(gene)) {
String tcNumber = container.getTcdbID();
String accession = container.getTarget();
if(complexesTCDB.containsKey(tcNumber) && complexesTCDB.get(tcNumber).size() > 1
if(container.getEvalue() <= properties.geteValueThreshold() &&
complexesTCDB.containsKey(tcNumber) && complexesTCDB.get(tcNumber).size() > 1
&& complexesTCDB.get(tcNumber).contains(accession)) {
if(!resultsByTCnumber.containsKey(tcNumber))
......@@ -84,24 +88,24 @@ public class GPRAssociations {
}
if(!resultsByTCnumber.isEmpty()) {
try {
assigned = findBestSubunits(resultsByTCnumber.get(tcNumber), assigned, invertedMapping);
// if(assigned.containsKey("b4087"))
// System.out.println();
}
catch (StackOverflowError e) {
logger.error("A StackOverflowError occurred while searching subunits for tcNumber {}", tcNumber);
}
}
GPR.put(tcNumber, assigned);
}
}
return GPR;
......@@ -152,13 +156,16 @@ public class GPRAssociations {
Set<String> allAccessionRemaining = new HashSet<>(data.keySet());
for(String queryGene : new HashSet<>(invertedMapping.keySet())) {
if(assigned.size() > 0) {
for(String queryGene : new HashSet<>(invertedMapping.keySet())) {
invertedMapping.get(queryGene).removeAll(assigned.keySet());
// allAccessionRemaining.addAll(invertedMapping.get(queryGene));
invertedMapping.get(queryGene).removeAll(assigned.keySet());
// allAccessionRemaining.addAll(invertedMapping.get(queryGene));
}
allAccessionRemaining.removeAll(assigned.values());
}
allAccessionRemaining.removeAll(assigned.values());
Pair<String, String> pair = findBestGene(data, allAccessionRemaining, invertedMapping, assigned);
......@@ -280,43 +287,43 @@ public class GPRAssociations {
* @throws Exception
*/
public static Map<String, String> buildGeneRules(RestNeo4jGraphDatabase service, Map<String, Map<String, String>> proteinComplexes,
Map<String, Map<String, Set<String>>> results) throws Exception {
Map<String, Map<String, Set<String>>> results, Map<String, Set<String>> complexesTCDB) throws Exception {
// Map<String, String> genes = FilesUtils.readMapFromFile("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\geneRules.txt");
// Map<String, String> locus = FilesUtils.readMapFromFile("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\Acc_to_locus.txt");
// Map<String, String> locus = FilesUtils.readMapFromFile("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\Acc_to_locus.txt");
// List<String[]> allData = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Reports\\ecoli_Validation30\\AllTRIAGEData_V13.xlsx");
// List<String[]> excel = new ArrayList<>();
Map<String, Set<String>> initalRules = new HashMap<>();
Map<String, Map<String, String>> initalRules = new HashMap<>();
Set<String> reactions = new HashSet<>();
for(String acc : results.keySet()) {
String gene = acc;
// if(locus.containsKey(acc)) {
//
// gene = locus.get(acc);
// }
// if(locus.containsKey(acc)) {
//
// gene = locus.get(acc);
// }
for(String tc : results.get(acc).keySet()) {
reactions.addAll(results.get(acc).get(tc));
for(String r : results.get(acc).get(tc)) {
// System.out.println("r" + r);
// System.out.println("r" + r);
if(initalRules.containsKey(r)) {
Set<String> set = initalRules.get(r);
set.add(gene);
Map<String, String> set = initalRules.get(r);
set.put(gene, tc);
initalRules.put(r, set);
}
else {
Set<String> set = new HashSet<>();
set.add(gene);
Map<String, String> set = new HashMap<>();
set.put(gene, tc);
initalRules.put(r, set);
}
......@@ -327,132 +334,135 @@ public class GPRAssociations {
Map<String, String> rules = new HashMap<>();
// Map<String, TransytNode> allReactionNodes = service.getAllReactionNodes();
// Map<String, TransytNode> allReactionNodes = service.getAllReactionNodes();
logger.trace("Searching all tc Numbers by reactions in the database...");
Map<String, Set<String>> tcNumbersByReaction = service.findAllTcNumbersByReaction();
logger.trace("Search complete!");
service.close();
for(String react : reactions) {
// System.out.println(react);
// String auxID = react.replace("iTR", "TR").replaceAll("TRUni", "TRUnp");
//
// if(auxID.contains("TRUnp") || auxID.contains("TRSym") || (auxID.contains("TRAnt") && !auxID.matches("TRAnt__.+_.+_.+"))) {
// auxID = auxID.replaceAll("i_", "_").replaceAll("i$", "").replaceAll("o_", "_").replaceAll("o$", "");
//// System.out.println("Replacing...");
// }
// auxID = auxID.replace("TRUnp", "TRUni");
// System.out.println(auxID);
// System.out.println(react);
String auxID = react.replace("iTR", "TR").replaceAll("TRUni", "TRUnp");
if(auxID.contains("TRUnp") || auxID.contains("TRSym") || (auxID.contains("TRAnt") && !auxID.matches("TRAnt__.+_.+_.+"))) {
auxID = auxID.replaceAll("i_", "_").replaceAll("i$", "").replaceAll("o_", "_").replaceAll("o$", "");
// System.out.println("Replacing...");
}
auxID = auxID.replace("TRUnp", "TRUni");
// System.out.println(auxID);
if(tcNumbersByReaction.containsKey(auxID)) {
// System.out.println("ENTROU!!!");
// TransytNode node = allReactionNodes.get(auxID);
if(tcNumbersByReaction.containsKey(react)) {
// if(line[0].equals("iTRnad__5cpd00067i_cpd00004_cpd15560")) {
// System.out.println("ENTROU!!!");
// String react = line[0].trim();
// TransytNode node = allReactionNodes.get(auxID);
String geneRule = "";
// if(line[0].equals("iTRnad__5cpd00067i_cpd00004_cpd15560")) {
// String[] newline = new String[2];
// String react = line[0].trim();
// ReactionCI reaction = container.getReaction(react);
String geneRule = "";
// System.out.println(react);
// String[] newline = new String[2];
// Iterable<Relationship> relationships = service.findReactionNode(react.replace("CoA", "abc")).getRelationships(TransytRelationshipType.has_reaction);
// ReactionCI reaction = container.getReaction(react);
// TransytNode node = service.findReactionNode();
Set<String> TCs = tcNumbersByReaction.get(auxID);
// System.out.println(react);
List<Set<String>> allRules = new ArrayList<>();
// Iterable<Relationship> relationships = service.findReactionNode(react.replace("CoA", "abc")).getRelationships(TransytRelationshipType.has_reaction);
for(String tcNumber : TCs) {
// TransytNode node = service.findReactionNode();
if(proteinComplexes.containsKey(tcNumber) && proteinComplexes.get(tcNumber) != null) {
Set<String> complex = proteinComplexes.get(tcNumber).keySet();
Set<String> TCs = tcNumbersByReaction.get(react);
Set<String> rule = new HashSet<>();
List<Set<String>> allRules = new ArrayList<>();
String subGeneRule = "(";
for(String tcNumber : TCs) {
for(String query : complex) {
if(proteinComplexes.containsKey(tcNumber) && proteinComplexes.get(tcNumber) != null) {
Set<String> complex = proteinComplexes.get(tcNumber).keySet();
Set<String> rule = new HashSet<>();
query = query.split("\\s+")[0];
String subGeneRule = "(";
// subGeneRule = subGeneRule.concat(query).concat(" and ");
// rule.add(query);
for(String query : complex) {
// if(locus.containsKey(query)) {
//
// subGeneRule = subGeneRule.concat(locus.get(query)).concat(" and ");
// rule.add(locus.get(query));
// }
// else {
subGeneRule = subGeneRule.concat(query).concat(" and ");
rule.add(query);
// }
}
query = query.split("\\s+")[0];
// System.out.println(">>> " + subGeneRule);
// System.out.println(allRules);
// System.out.println(rule);
// subGeneRule = subGeneRule.concat(query).concat(" and ");
// rule.add(query);
boolean save = true;
// if(locus.containsKey(query)) {
//
// subGeneRule = subGeneRule.concat(locus.get(query)).concat(" and ");
// rule.add(locus.get(query));
// }
// else {
subGeneRule = subGeneRule.concat(query).concat(" and ");
rule.add(query);
// }
}
for(Set<String> rule2 : new ArrayList<>(allRules)) {
// System.out.println(">>> " + subGeneRule);
// System.out.println(allRules);
// System.out.println(rule);
// System.out.println(rule2);
// System.out.println(rule);
// System.out.println();
boolean save = true;
if(rule2.containsAll(rule) || rule.containsAll(rule2)) {
// System.out.println("guardar");
allRules.add(rule);
save = false;
}
for(Set<String> rule2 : new ArrayList<>(allRules)) {
}
// System.out.println(rule2);
// System.out.println(rule);
// System.out.println();
if(rule2.containsAll(rule) || rule.containsAll(rule2)) {
// System.out.println("guardar");
allRules.add(rule);
save = false;
}
}
allRules.add(rule);