Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 4d8a6613 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

several updates, triage configuration files created

parent 8d7a44c1
897 Desulfococcus multivorans
392416 Lactobacillus crustorum
61015 Staphylococcus succinus
43657 Pseudoalteromonas luteoviolacea
490 Neisseria sicca
309800 Haloferax volcanii (strain ATCC 29605 / DSM 3757 / JCM 8879 / NBRC 14742 / NCIMB 2012 / VKM B-1768 / DS2)
1664069 Bacillus glycinifermentans
897 [Bacteria, Proteobacteria, Deltaproteobacteria, Desulfobacterales, Desulfobacteraceae, Desulfococcus]
392416 [Bacteria, Firmicutes, Bacilli, Lactobacillales, Lactobacillaceae, Lactobacillus]
61015 [Bacteria, Firmicutes, Bacilli, Bacillales, Staphylococcaceae, Staphylococcus]
43657 [Bacteria, Proteobacteria, Gammaproteobacteria, Alteromonadales, Pseudoalteromonadaceae, Pseudoalteromonas]
490 [Bacteria, Proteobacteria, Betaproteobacteria, Neisseriales, Neisseriaceae, Neisseria]
309800 [Archaea, Euryarchaeota, Halobacteria, Haloferacales, Haloferacaceae, Haloferax]
1664069 [Bacteria, Firmicutes, Bacilli, Bacillales, Bacillaceae, Bacillus]
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -3,6 +3,7 @@ package blast;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
......@@ -25,17 +26,16 @@ import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import connection.TcdbExplorer;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ncbi.CreateGenomeFile;
import pt.uminho.ceb.biosystems.merlin.local.alignments.core.RunSimilaritySearch;
import pt.uminho.ceb.biosystems.merlin.local.alignments.core.ModelMerge.BlastAlignment;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import tcdb.tcdbTransportTypesRetriever.TcdbExplorer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import utilities.triage_utilities.Properties;
/**
* @author Davide
......@@ -45,18 +45,33 @@ public class Blast implements Observer{
private static final Logger logger = LoggerFactory.getLogger(Blast.class);
ConcurrentLinkedQueue<AlignmentCapsule> results;
private ConcurrentLinkedQueue<AlignmentCapsule> results;
private String currentTempDirectory;
private Integer queryFileSize = 0;
private Properties properties;
private String queryFilePath;
public Blast() {
public Blast(String queryFilePath, Properties properties) {
try {
logger.info("Blast process initializing...");
this.properties = properties;
this.queryFilePath = queryFilePath;
currentTempDirectory = properties.getCurrentTempDirectory();
results = performBlast();
logger.info("Blast process finished!");
}
catch(FileNotFoundException e1) {
logger.error("The genome file does not exist in the given path!!");
}
catch (Exception e) {
e.printStackTrace();
}
......@@ -74,35 +89,42 @@ public class Blast implements Observer{
//BLAST
String filePath = "C:/Users/Davide/Documents/reactionsBuilderTriage/temp/";
String tcdbFastaFile = filePath.concat("tcdbSEQs.txt");
// String filePath = "C:/Users/Davide/Documents/reactionsBuilderTriage/temp/";
String tcdbFastaFile = currentTempDirectory.concat("tcdbSEQs.txt");
// String queryFastaFile = "C:\\Users\\Davide\\Downloads\\NC_all.txt";
// String queryFastaFile = "C:\\Users\\Davide\\Downloads\\GCF_001951175.1_ASM195117v1_protein.faa\\GCF_001951175.1_ASM195117v1_protein.faa";
// String queryFastaFile = "C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\KBase\\Genomes\\Bacillus glycinifermentans\\GCF_900093775.1_EVONIK_BGLY_protein.faa";
logger.info("Downloading FASTA file from TCDB at: "); //incluir endereço e excepçoes caso nao consiga aceder à net
Map<String, AbstractSequence<?>> tcdbGenes = getTcdbInMapFormat();
logger.debug("Saving TCDB fasta in local folder: "); //indicar o caminho
logger.debug("Saving TCDB fasta in local folder: {}", tcdbFastaFile); //indicar o caminho
CreateGenomeFile.buildFastaFile(tcdbFastaFile, tcdbGenes);
logger.debug("Reading given target genome FASTA at: "); //indicar caminho
logger.debug("Reading given target genome FASTA at: {}", queryFilePath); //indicar caminho
System.out.println("temp" + "\t" + currentTempDirectory);
ConcurrentHashMap<String, AbstractSequence<?>> sequences= new ConcurrentHashMap<String, AbstractSequence<?>>();
sequences.putAll(FastaReaderHelper.readFastaProteinSequence(new File("C:/Users/Davide/Downloads/nvulgaris_29421_protein.faa")));
sequences.putAll(FastaReaderHelper.readFastaProteinSequence(new File(queryFilePath)));
queryFileSize = sequences.size();
logger.info("Blast process initialized!");
RunSimilaritySearch run_similaritySearch = new RunSimilaritySearch(tcdbGenes, 0.3,
Method.SmithWaterman, sequences, new AtomicBoolean(false), new AtomicInteger(0), new AtomicInteger(0), AlignmentScoreType.ALIGNMENT);
run_similaritySearch.setTcdbFastaFilePath(tcdbFastaFile);
run_similaritySearch.setSubjectFastaFilePath(tcdbFastaFile);
run_similaritySearch.addObserver(this);
run_similaritySearch.setCurrentTempFolderDirectory(filePath);
run_similaritySearch.setWorkspaceTaxonomyFolderPath(currentTempDirectory);
ConcurrentLinkedQueue<AlignmentCapsule> results = null;
if(sequences.keySet().size()>0)
results = run_similaritySearch.runBlastSearch(true);
results = run_similaritySearch.runBlastSearch(true, properties.getBlastEvalueThreshold(), properties.getBitScore(), properties.getQueryCoverage());
return results;
}
......@@ -180,5 +202,12 @@ public class Blast implements Observer{
return results;
}
/**
* @return the queryFileSize
*/
public Integer getQueryFileSize() {
return queryFileSize;
}
}
......@@ -32,12 +32,12 @@ public class GenerateTransportReactions {
Map<String, Map<String, TcNumberContainer>> mainMap = new HashMap<>();
Synonyms dictionary = new Synonyms();
int noMetabolitesNoReactionsCounter = 0;
// Map<String, TcdbMetabolitesContainer> tcdbMetabolites = new HashMap<>();
//
// tcdbMetabolites.put("O51918", tcdbMetabolitessss.get("O51918"));
// Map<String, TcdbMetabolitesContainer> tcdbMetabolites = new HashMap<>();
// //
// tcdbMetabolites.put("O07923", tcdbMetabolitessss.get("O07923"));
for(String accession : tcdbMetabolites.keySet()) {
......@@ -57,7 +57,7 @@ public class GenerateTransportReactions {
try {
TcNumberContainer tcNumberContainer = data.get(familyTC);
if(tcNumberContainer.getAllReactionsIds().size() == 0) {
newTcContainer = correctReaction(tcNumber, "NONE", "C0" , TypeOfTransporter.Uniport, null , tcdbMetContainer.getMetabolites(tcNumber), newTcContainer);
......@@ -66,12 +66,12 @@ public class GenerateTransportReactions {
for(int id : tcNumberContainer.getAllReactionsIds()) {
ReactionContainer reactionContainer = tcNumberContainer.getReactionContainer(id);
TypeOfTransporter evidence = checkForEvidenceOfTransport(tcdbMetContainer.getDescription(tcNumber), tcNumber);
if(evidence == null && proteinFamilyDescription.containsKey(tcNumber.replaceAll("(\\.\\d+)$", "")))
evidence = checkForEvidenceOfTransport(proteinFamilyDescription.get(tcNumber.replaceAll("(\\.\\d+)$", "")), tcNumber);
evidence = checkForEvidenceOfTransport(proteinFamilyDescription.get(tcNumber.replaceAll("(\\.\\d+)$", "")), tcNumber);
// System.out.println(accession);
Boolean antiportOrSymport = selectMethodOfMetabolitesDistribution(reactionContainer.getReaction(), reactionContainer.getTransportType(), dictionary);
......@@ -90,11 +90,16 @@ public class GenerateTransportReactions {
// System.out.println(metabolites);
// Set<String> metabolites = tcdbMetContainer.getMetabolites(tcNumber);
// System.out.println(reactionContainer.getTransportType());
if(evidence == null) {
if(reactionContainer.getTransportType().equals(TypeOfTransporter.Uniport) || reactionContainer.getTransportType().equals(TypeOfTransporter.Symport))
newTcContainer = processUniportAndSymportReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession);
else if(!reactionContainer.getTransportType().equals(TypeOfTransporter.Antiport))
newTcContainer = processBiochemicalReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession);
else
newTcContainer = processAntiportReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession);
}
......@@ -123,23 +128,23 @@ public class GenerateTransportReactions {
}
}
}
if(newTcContainer.getAllReactionsIds().size() == 0 && tcNumberContainer.getAllReactionsIds().size() > 0) {
newTcContainer = correctReaction(tcNumber, "NONE", "C-2" , TypeOfTransporter.Uniport, null , tcdbMetContainer.getMetabolites(tcNumber), newTcContainer);
// for(int id : tcNumberContainer.getAllReactionsIds()) {
//
// System.out.println(accession + "\t" + tcNumber + "\t" + tcdbMetContainer.getMetabolites(tcNumber));
// System.out.println(tcNumberContainer.getReactionContainer(id).getReaction());
// System.out.println();
//
// }
// for(int id : tcNumberContainer.getAllReactionsIds()) {
//
// System.out.println(accession + "\t" + tcNumber + "\t" + tcdbMetContainer.getMetabolites(tcNumber));
// System.out.println(tcNumberContainer.getReactionContainer(id).getReaction());
// System.out.println();
//
// }
}
if(newTcContainer.getAllReactionsIds().size() == 0) //reactions with generic reactions such as
noMetabolitesNoReactionsCounter++; //solute1 + solute2 <-> solute1 + solute2 are also included here because no metabolites where found for them
newTcContainer = filterUnwantedReactions(tcNumber, newTcContainer);
newContainers.put(tcNumber, newTcContainer);
......@@ -155,28 +160,159 @@ public class GenerateTransportReactions {
mainMap.put(accession, newContainers);
}
System.out.println("[INFO] Transporters with no metabolites and no reactions associated: " + noMetabolitesNoReactionsCounter);
return mainMap;
}
private static TcNumberContainer processBiochemicalReactions(ReactionContainer reactionContainer,
TcNumberContainer newTcContainer, Set<String> metabolites, Synonyms dictionary, String tc) {
// Set<String> preReactions = new HashSet<>();
// Set<String> reactions = new HashSet<>();
// if(tc.equals("3.A.1.7.5 O51235")) {
//
// System.out.println("entrou!!!");
// }
String originalReaction = reactionContainer.getReaction();
// if(tc.equals("3.A.1.7.5 O51235")) {
//
// System.out.println(originalReaction);
// }
String[] aux = correctMetabolitesNames(originalReaction, reactionContainer.getReactant(), reactionContainer.getProduct());
originalReaction = aux[0];
String reactant = aux[1];
String product = aux[2];
List<String> reactants = getAllPossibleReactantsorProducts(reactant, tc);
List<String> products = getAllPossibleReactantsorProducts(product, tc);
Map<Integer, Integer> positions = associateReactantsToProducts(reactants, products);
// if(tc.equals("3.D.1.1.1 P33602")) {
// System.out.println("reactants >>>>" + reactant);
// System.out.println("products >>>>" + products);
// System.out.println("positions >>>>" + positions);
// }
//
//
if(tc.equals("3.A.1.7.5 O51235")) {
System.out.println(positions);
System.out.println(metabolites);
}
for(int key : positions.keySet()) {
for(String metabolite : metabolites) {
String newReactant = replaceGenericMetabolite(reactants.get(key), metabolite);
String newProduct = replaceGenericMetabolite(products.get(positions.get(key)), metabolite);
// if(result[2].equalsIgnoreCase("3")) {
// System.out.println(tc);
// System.out.println(originalReaction);
// System.out.println(result[0] + " " + reactionContainer.getDirection() + " " + result[1]);
// //
// System.out.println();
// }
ReactionContainer newReactContainer = new ReactionContainer(newReactant, newProduct, reactionContainer.isReversible());
newReactContainer.setTransportType(reactionContainer.getTransportType());
newReactContainer.setProperties(reactionContainer.getProperties());
newReactContainer.setOriginalReaction(originalReaction);
newReactContainer.setConfidenceLevel("B-0");
// System.out.println(newReactContainer.getReaction());
newTcContainer.addReaction(newReactContainer);
if(tc.equals("3.A.1.7.5 O51235")) {
System.out.println(newReactContainer.getReaction());
}
// else {
// System.out.println(tc + " --->> antiport" + " -----> " + metabolites);
// System.out.println(reactants.get(key) + " " + reactionContainer.getDirection() + " " + products.get(positions.get(key)));
//
// System.out.println();
// }
}
}
// for(String r : reactions)
// System.out.println(r);
// for(String react : reactions) {
//
// System.out.println(react);
// String[] newReaction;
//
// if(reactionContainer.getReversible())
// newReaction = react.split(ReactionContainer.REV_TOKEN);
// else
// newReaction = react.split(ReactionContainer.IRREV_TOKEN);
//
// ReactionContainer newReactContainer = new ReactionContainer(newReaction[0], newReaction[1], reactionContainer.getReversible());
//
// newReactContainer.setTransportType(reactionContainer.getTransportType());
// newReactContainer.setProperties(reactionContainer.getProperties());
// newReactContainer.setOriginalReaction(originalReaction);
//
// newTcContainer.addReaction(newReactContainer);
// }
// System.out.println();
return newTcContainer;
}
/**
* @param string
* @return
*/
private static String replaceGenericMetabolite(String text, String metabolite) {
for(String target : GENERAL_METABOLITES) {
if(text.contains(target)) {
return text.replace(target, metabolite);
}
}
return text;
}
private static TcNumberContainer filterUnwantedReactions(String tcNumber, TcNumberContainer newTcContainer) {
Set<TypeOfTransporter> set = newTcContainer.getAllTransportTypesAssociated();
if(set.size() > 1) {
for(int id : newTcContainer.getAllReactionsIds()) {
ReactionContainer container = newTcContainer.getReactionContainer(id);
if(!container.getTransportType().equals(TypeOfTransporter.Uniport))
newTcContainer.removeReaction(id);
}
}
return newTcContainer;
}
......@@ -191,16 +327,19 @@ public class GenerateTransportReactions {
private static TcNumberContainer correctReaction(String tc, String originalReaction, String confLevel, TypeOfTransporter evidence, Map<String, String> properties, Set<String> metabolites, TcNumberContainer newTcContainer) {
Set<String> reactions = new HashSet<>();
// if(confLevel.equals("C-17asghghsa" + metabolites + "\t" + tc);
if(tc.equals("2.A.60.1.5"))
System.out.println(evidence);
// if(confLevel.equals("C-17asghghsa" + metabolites + "\t" + tc);
metabolites.remove(TcdbMetabolitesContainer.PROTON);
metabolites.remove(TcdbMetabolitesContainer.EMPTY);
metabolites.remove(TcdbMetabolitesContainer.UNKNOWN);
// if(confLevel.equals("C-1"))
// System.out.println(metabolites + "\t" + tc);
// if(confLevel.equals("C-1"))
// System.out.println(metabolites + "\t" + tc);
for(String metabolite : metabolites)
reactions.add(generateReactionFromMetabolites(originalReaction, evidence, metabolite));
......@@ -239,7 +378,7 @@ public class GenerateTransportReactions {
return metabolite.concat(" (in) + ").concat(TcdbMetabolitesContainer.PROTON).concat(" (out) ").concat(ReactionContainer.REVERSIBLE_TOKEN)
.concat(metabolite).concat(" (out) + ").concat(TcdbMetabolitesContainer.PROTON).concat(" (in)");
}
return originalReaction;
}
......@@ -281,11 +420,14 @@ public class GenerateTransportReactions {
* @return
*/
private static TypeOfTransporter checkForEvidenceOfTransport(String description, String tcNumber) {
try {
if(tcNumber.equals("2.A.1.4.8"))
if(tcNumber.equals("2.A.1.4.8")) //create exception
return TypeOfTransporter.Uniport;
if(tcNumber.equals("2.A.60.1.5"))
return TypeOfTransporter.Antiport;
if(description.matches("(?i).*uniporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Uniport;
......@@ -298,7 +440,7 @@ public class GenerateTransportReactions {
catch (Exception e) {
System.out.println(tcNumber);
System.out.println(description);
e.printStackTrace();
}
......@@ -900,7 +1042,6 @@ public class GenerateTransportReactions {
// System.out.println(reactant + " <<<<>>>> " + product + "\t\t" + metabolites);
//
boolean accept = false;
boolean twoMetabolites = false;
Object[] regex = dictionary.correctSugars(reactant, product, metabolites, tc);
......@@ -941,8 +1082,6 @@ public class GenerateTransportReactions {
if(reactantMetabolites.containsKey(evidenceOfSymOrAnti) || productMetabolites.containsKey(evidenceOfSymOrAnti)) {
twoMetabolites = true;
// if(tc.equals("2.A.15.2.1 P31553"))
// System.out.println(metabolites);
......@@ -1469,7 +1608,7 @@ public class GenerateTransportReactions {
if(accept)
confidenceLevel = count;
}
count++;
//for unknown or empty, the default equation is accepted
......@@ -1495,10 +1634,20 @@ public class GenerateTransportReactions {
count++;
// if(!accept) {
//
// for(String metabolite : metabolites) {
//
// for(String react : reactantMetabolites.keySet()) {
//
// if(dictionary.isChildOf(metabolite, react))
// accept = true;
// }
// }
// }
count++;
if(accept) {
String[] result = new String[3];
......@@ -1593,7 +1742,7 @@ public class GenerateTransportReactions {
List<String> reactants = getAllPossibleReactantsorProducts(reactant, tc);
List<String> products = getAllPossibleReactantsorProducts(product, tc);
Map<Integer, Integer> positions = associateReactantsToProducts(reactants, products);
// if(tc.equals("3.D.1.1.1 P33602")) {
......
......@@ -15,8 +15,12 @@ import java.util.concurrent.TimeUnit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import connection.LinkConnection;
import connection.TcdbExplorer;
import connection.TcdbRetriever;
import enumerators.TypeOfTransporter;
import files.FilesUtils;
import javafx.util.Pair;
......@@ -48,6 +52,8 @@ public class FindTransporters {
public static final String PATH = "C:\\Users\\Davide\\Documents\\TCFilesBackup\\";
private static final Logger logger = LoggerFactory.getLogger(FindTransporters.class);
/**
* Mathod to retrieve all information about all tcNumber (*.*.* format) from TCDB.
*
......@@ -117,18 +123,20 @@ public class FindTransporters {
if(progress > lastProgress){
lastProgress = progress;
System.out.println(progress + " % search complete" );
String message = progress + " % search complete";
logger.debug(message);
}
if(attempt == LIMIT && !found){
System.out.println("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
logger.warn("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
failed.add(tc);
}
}
System.out.println("FAILED: " + failed.size() + "\t" + failed);
String message = "FAILED QUERIES: " + failed.size() + "\t" + failed;
logger.warn(message);
return data;
}
......@@ -269,11 +277,11 @@ public class FindTransporters {
}
catch (Exception e) {
System.out.println(originalTC);
System.out.println(text);
System.out.println(html);
logger.trace(originalTC);
logger.trace(text);
logger.trace(html);
e.printStackTrace();
logger.trace("StrackTrace: {}", e);
}
}
......@@ -965,10 +973,12 @@ public class FindTransporters {
return container;