Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit c693031a authored by Davide Lagoa's avatar Davide Lagoa
Browse files

blast operation created

parent c45bc230
......@@ -12,11 +12,7 @@
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
......
package betaBarrels;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
......
package betaBarrels;
import java.util.HashMap;
import java.util.Map;
public class main {
public static void main(String[] args) {
......
package blast;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Observable;
import java.util.Observer;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ncbi.CreateGenomeFile;
import pt.uminho.ceb.biosystems.merlin.local.alignments.core.RunSimilaritySearch;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import tcdb.tcdbTransportTypesRetriever.TcdbExplorer;
public class Blast implements Observer{
public Blast() {
try {
performBlast();
}
catch (Exception e) {
e.printStackTrace();
}
}
private void performBlast() throws Exception {
//BLAST
System.out.println("INIT BLAST!!");
String filePath = "C:/Users/Davide/Documents/reactionsBuilderTriage/temp/";
String tcdbFastaFile = filePath.concat("tcdbSEQs.txt");
Map<String, AbstractSequence<?>> tcdbGenes = getTcdbInMapFormat();
System.out.println("BIOJAVA TCDB!!");
CreateGenomeFile.buildFastaFile(tcdbFastaFile, tcdbGenes);
System.out.println("BIOJAVA RENATO!!");
ConcurrentHashMap<String, AbstractSequence<?>> sequences= new ConcurrentHashMap<String, AbstractSequence<?>>();
sequences.putAll(FastaReaderHelper.readFastaProteinSequence(new File("C:/Users/Davide/Downloads/nvulgaris_29421_protein.faa")));
System.out.println("BLAST!!");
RunSimilaritySearch run_similaritySearch = new RunSimilaritySearch(tcdbGenes, 0.1,
Method.SmithWaterman, sequences, new AtomicBoolean(false), new AtomicInteger(0), new AtomicInteger(0), AlignmentScoreType.ALIGNMENT);
run_similaritySearch.setTcdbFastaFilePath(tcdbFastaFile);
run_similaritySearch.addObserver(this);
run_similaritySearch.setCurrentTempFolderDirectory(filePath);
//////set temp directory here!!!
ConcurrentLinkedQueue<AlignmentCapsule> results = null;
if(sequences.keySet().size()>0)
results = run_similaritySearch.runBlastSearch(true);
System.out.println("BLAST PERFORMED!!");
}
/**
*
* @param url
* @return
* @throws Exception
*/
public static Map<String, AbstractSequence<?>> getTcdbInMapFormat() throws Exception {
InputStream tcdbInputStream = (new URL(TcdbExplorer.TCDB_FASTA_URL)).openStream();
BufferedReader br= new BufferedReader(new InputStreamReader(tcdbInputStream));
StringBuilder sb = new StringBuilder();
String line;
while ((line = br.readLine()) != null)
sb.append(line.concat("\n"));
String theString = sb.toString().replace("</p>", "").replace("<p>", "").replace(">gnl|TC-DB|xxxxxx 3.A.1.205.14 \ndsfgdfg", "");
byte[] bytes = theString.getBytes("utf-8");
tcdbInputStream = new ByteArrayInputStream(bytes);
FastaReader<ProteinSequence,AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence,AminoAcidCompound>(
tcdbInputStream,
//tcdbFile,
new GenericFastaHeaderParser<ProteinSequence,AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
// System.out.println("CHECK3---->"+LocalDateTime.now().getHour()+":"+LocalDateTime.now().getMinute()+":"+LocalDateTime.now().getSecond());
Map<String, AbstractSequence<?>> tcdb = new HashMap<>();
tcdb.putAll(fastaReader.process());
return tcdb;
}
@Override
public void update(Observable arg0, Object arg1) {
// TODO Auto-generated method stub
}
}
......@@ -348,6 +348,8 @@ public class Compare {
workbook.write(fileOut);
fileOut.flush();
fileOut.close();
workbook.close();
System.out.println("xlsx containing the comparison results created");
......
......@@ -26,13 +26,18 @@ public class GenerateTransportReactions {
* @param tcdbMetabolites
* @return
*/
public static Map<String, Map<String, TcNumberContainer>> generateReactions(Map<String, TcNumberContainer> data, Map<String, TcdbMetabolitesContainer> tcdbMetabolites) {
public static Map<String, Map<String, TcNumberContainer>> generateReactions(Map<String, TcNumberContainer> data,
Map<String, TcdbMetabolitesContainer> tcdbMetabolites, Map<String, String> proteinFamilyDescription) {
Map<String, Map<String, TcNumberContainer>> mainMap = new HashMap<>();
Synonyms dictionary = new Synonyms();
int noMetabolitesNoReactionsCounter = 0;
// Map<String, TcdbMetabolitesContainer> tcdbMetabolites = new HashMap<>();
//
// tcdbMetabolites.put("O51918", tcdbMetabolitessss.get("O51918"));
for(String accession : tcdbMetabolites.keySet()) {
......@@ -61,9 +66,12 @@ public class GenerateTransportReactions {
for(int id : tcNumberContainer.getAllReactionsIds()) {
ReactionContainer reactionContainer = tcNumberContainer.getReactionContainer(id);
TypeOfTransporter evidence = checkForEvidenceOfTransport(tcdbMetContainer.getDescription(tcNumber), tcNumber);
if(evidence == null && proteinFamilyDescription.containsKey(tcNumber.replaceAll("(\\.\\d+)$", "")))
evidence = checkForEvidenceOfTransport(proteinFamilyDescription.get(tcNumber.replaceAll("(\\.\\d+)$", "")), tcNumber);
// System.out.println(accession);
Boolean antiportOrSymport = selectMethodOfMetabolitesDistribution(reactionContainer.getReaction(), reactionContainer.getTransportType(), dictionary);
......@@ -83,8 +91,6 @@ public class GenerateTransportReactions {
// Set<String> metabolites = tcdbMetContainer.getMetabolites(tcNumber);
if(evidence == null) {
if(reactionContainer.getTransportType().equals(TypeOfTransporter.Uniport) || reactionContainer.getTransportType().equals(TypeOfTransporter.Symport))
......@@ -109,7 +115,7 @@ public class GenerateTransportReactions {
newTcContainer = processAntiportReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession);
// }
else
else if(!tcNumberContainer.evidenceTransportTypeExists(evidence))
newTcContainer = correctReaction(tcNumber, reactionContainer.getReaction(), "C-1" ,evidence, reactionContainer.getProperties(), metabolites, newTcContainer);
// System.out.println(tcNumber+" "+accession + " " + evidence + " " + reactionContainer.getTransportType() + " " + reactionContainer.getReaction() + "\t" + metabolites);
......@@ -133,7 +139,8 @@ public class GenerateTransportReactions {
if(newTcContainer.getAllReactionsIds().size() == 0) //reactions with generic reactions such as
noMetabolitesNoReactionsCounter++; //solute1 + solute2 <-> solute1 + solute2 are also included here because no metabolites where found for them
newTcContainer = filterUnwantedReactions(tcNumber, newTcContainer);
newContainers.put(tcNumber, newTcContainer);
}
......@@ -155,6 +162,24 @@ public class GenerateTransportReactions {
}
private static TcNumberContainer filterUnwantedReactions(String tcNumber, TcNumberContainer newTcContainer) {
Set<TypeOfTransporter> set = newTcContainer.getAllTransportTypesAssociated();
if(set.size() > 1) {
for(int id : newTcContainer.getAllReactionsIds()) {
ReactionContainer container = newTcContainer.getReactionContainer(id);
if(!container.getTransportType().equals(TypeOfTransporter.Uniport))
newTcContainer.removeReaction(id);
}
}
return newTcContainer;
}
/**
* Process metabolites associated to the wrong transport types.
*
......@@ -256,18 +281,26 @@ public class GenerateTransportReactions {
* @return
*/
private static TypeOfTransporter checkForEvidenceOfTransport(String description, String tcNumber) {
if(tcNumber.equals("2.A.1.4.8"))
return TypeOfTransporter.Uniport;
if(description.matches("(?i).*uniporte*r*[\\s*\\.*].*"))
return TypeOfTransporter.Uniport;
else if(description.matches("(?i).*symporte*r*[\\s*\\.*].*"))
return TypeOfTransporter.Symport;
else if(description.matches("(?i).*antiporte*r*[\\s*\\.*]"))
return TypeOfTransporter.Antiport;
try {
if(tcNumber.equals("2.A.1.4.8"))
return TypeOfTransporter.Uniport;
if(description.matches("(?i).*uniporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Uniport;
else if(description.matches("(?i).*symporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Symport;
else if(description.matches("(?i).*antiporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Antiport;
}
catch (Exception e) {
System.out.println(tcNumber);
System.out.println(description);
e.printStackTrace();
}
return null;
}
......@@ -745,7 +778,7 @@ public class GenerateTransportReactions {
reactant = aux[1];
product = aux[2];
Set<String> reactions = getAllPossibleReactions(originalReaction, reactant, product, reactionContainer.getReversible(), tc);
Set<String> reactions = getAllPossibleReactions(originalReaction, reactant, product, reactionContainer.isReversible(), tc);
// if(tc.equals("2.A.20.1.2 P43676")) {
// System.out.println("Areactions " + reactions);
......@@ -757,7 +790,7 @@ public class GenerateTransportReactions {
// System.out.println(react);
String[] newReaction;
if(reactionContainer.getReversible())
if(reactionContainer.isReversible())
newReaction = react.split(ReactionContainer.REV_TOKEN);
else
newReaction = react.split(ReactionContainer.IRREV_TOKEN);
......@@ -785,7 +818,7 @@ public class GenerateTransportReactions {
// System.out.println();
// }
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.getReversible());
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.isReversible());
newReactContainer.setTransportType(reactionContainer.getTransportType());
newReactContainer.setProperties(reactionContainer.getProperties());
......@@ -1667,7 +1700,7 @@ public class GenerateTransportReactions {
// System.out.println();
// }
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.getReversible());
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.isReversible());
newReactContainer.setTransportType(reactionContainer.getTransportType());
newReactContainer.setProperties(reactionContainer.getProperties());
......
......@@ -39,15 +39,15 @@ public class FindTransportTypes {
* @throws IOException
* @throws Exception
*/
public static Map<String, TcNumberContainer> getAllTCNumbersInformation() throws IOException, Exception{
public static Map<String, TcNumberContainer> getAllTCNumbersInformation(Set<String> toSearch) throws IOException, Exception{
Map<String, TcNumberContainer> data = new HashMap<>();
// Set<String> toSearch = TcdbExplorer.getTcNumbers();
Set<String> toSearch = new HashSet<>();
toSearch.add("2.A.37");
// Set<String> toSearch = TcdbExplorer.getTcNumbers();
//
//// Set<String> toSearch = new HashSet<>();
//
//// toSearch.add("2.A.37");
List<String> searched = new ArrayList<>();
......
......@@ -30,7 +30,7 @@ import tcdb.capsules.TcNumberContainer;
public class FindTransporters {
public static final int LIMIT = 10;
private static final String URL = "http://www.tcdb.org/search/result.php?tc=";
public static final List<String> REVERSIBLES = List.of ("⇌", "⇌&nbsp;", "&harr;", "&#8652;", "⇋");
public static final List<String> IRREVERSIBLES = List.of ("&rarr;", "%u21CC", "%u2192", "--&gt;", "&rightarrow;", "&AElig;",
"&agrave;");
......@@ -90,7 +90,7 @@ public class FindTransporters {
while(attempt < LIMIT && !found) {
if(conn.getCodeConnection(URL.concat(tc)) == 200){
if(conn.getCodeConnection(TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc)) == 200){
FilesUtils.webPageSaver(conn.getPageOpenStream(), out);
......@@ -122,7 +122,7 @@ public class FindTransporters {
if(attempt == LIMIT && !found){
System.out.println("results not found for query: " + URL.concat(tc));
System.out.println("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
failed.add(tc);
}
......@@ -973,5 +973,5 @@ public class FindTransporters {
return null;
}
}
......@@ -50,10 +50,10 @@ public class ReadFastaTcdb {
String accession = "", tcNumber = "", organism = "", description = "";
Map<String, Integer> distributions1 = new TreeMap<>();
Map<String, Integer> distributions2 = new TreeMap<>();
Map<String, Integer> distributions3 = new TreeMap<>();
Map<String, Integer> distributions4 = new TreeMap<>();
// Map<String, Integer> distributions1 = new TreeMap<>();
// Map<String, Integer> distributions2 = new TreeMap<>();
// Map<String, Integer> distributions3 = new TreeMap<>();
// Map<String, Integer> distributions4 = new TreeMap<>();
Map<String, Integer> distributions5 = new TreeMap<>();
......@@ -115,53 +115,53 @@ public class ReadFastaTcdb {
// System.out.println("tc--" + tcNumber);
String[] newTcNumber = tcNumber.split("\\.");
// System.out.println(newTcNumber.length);
String newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]).concat(".").concat(newTcNumber[3]);
if(distributions4.containsKey(newTc)) {
int count = distributions4.get(newTc);
distributions4.put(newTc, count+1);
}
else
distributions4.put(newTc, 1);
newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]);
if(distributions3.containsKey(newTc)) {
int count = distributions3.get(newTc);
distributions3.put(newTc, count+1);
}
else
distributions3.put(newTc, 1);
newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]);
if(distributions2.containsKey(newTc)) {
int count = distributions2.get(newTc);
distributions2.put(newTc, count+1);
}
else
distributions2.put(newTc, 1);
newTc = newTcNumber[0];
if(distributions1.containsKey(newTc)) {
int count = distributions1.get(newTc);
distributions1.put(newTc, count+1);
}
else
distributions1.put(newTc, 1);
// String[] newTcNumber = tcNumber.split("\\.");
//
// // System.out.println(newTcNumber.length);
//
// String newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]).concat(".").concat(newTcNumber[3]);
//
// if(distributions4.containsKey(newTc)) {
//
// int count = distributions4.get(newTc);
// distributions4.put(newTc, count+1);
// }
//
// else
// distributions4.put(newTc, 1);
//
// newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]);
//
// if(distributions3.containsKey(newTc)) {
//
// int count = distributions3.get(newTc);
// distributions3.put(newTc, count+1);
// }
//
// else
// distributions3.put(newTc, 1);
//
// newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]);
//
// if(distributions2.containsKey(newTc)) {
//
// int count = distributions2.get(newTc);
// distributions2.put(newTc, count+1);
// }
//
// else
// distributions2.put(newTc, 1);
//
// newTc = newTcNumber[0];
//
// if(distributions1.containsKey(newTc)) {
//
// int count = distributions1.get(newTc);
// distributions1.put(newTc, count+1);
// }
//
// else
// distributions1.put(newTc, 1);
description = "";
......@@ -187,7 +187,7 @@ public class ReadFastaTcdb {
System.out.println("Number of different accessions: " + accessions.size());
return distributions3.keySet();
return distributions5.keySet();
}
catch (IOException e) {
e.printStackTrace();
......@@ -213,7 +213,7 @@ public class ReadFastaTcdb {
LinkConnection conn = new LinkConnection();
if(conn.getCodeConnection("http://www.tcdb.org/public/tcdb") == 200) {
if(conn.getCodeConnection(TcdbExplorer.TCDB_FASTA_URL) == 200) {
webPageSaver(conn.getPageOpenStream(), out);
......
......@@ -6,6 +6,7 @@ import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import files.FilesUtils;
import files.JSONFilesUtils;
import files.ReadExcelFile;
import tcdb.capsules.TcNumberContainer;
......@@ -26,16 +27,24 @@ public class Retriever {
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
// Set<String> toSearch = new HashSet<>();
//
// toSearch = TcdbExplorer.getTcNumbers();
System.out.println("Retrieving TCDB FASTA file...");
Set<String> tcNumbers = TcdbExplorer.getTcNumbers();
Set<String> toSearch = TcdbExplorer.generateTCsFamily(tcNumbers);
// toSearch.add("2.A.75");
////
// System.out.println("Retrieving data from TCDB...");
// Map<String, TcNumberContainer> data = FindTransporters.getAllTCNumbersInformation(toSearch);
// Map<String, String> proteinFamilyDescription = TcdbExplorer.getProteinsBelongingToFamilyDescription(tcNumbers);
//
// System.out.println("COMPLETE...");
// FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\proteinFamilyDescriptions.txt", proteinFamilyDescription);
Map<String, String> proteinFamilyDescription = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\proteinFamilyDescriptions.txt");
System.out.println("SEARCH COMPLETE...");
//
int n = 1;
......@@ -52,7 +61,7 @@ public class Retriever {
//
// ///////////////DESCOMENTAR
Map<String, TcNumberContainer> data = JSONFilesUtils.readJSONExceptionsFile(); //the reader that reads exceptios can also read normal data
......@@ -64,10 +73,15 @@ public class Retriever {
Map<String, TcdbMetabolitesContainer> tcdbMetabolites = ProcessTcdbMetabolitesExcel.processData(excel);
Map<String, Map<String, TcNumberContainer>> transportReactions = GenerateTransportReactions.generateReactions(data, tcdbMetabolites);
Map<String, Map<String, TcNumberContainer>> transportReactions = GenerateTransportReactions.generateReactions(data, tcdbMetabolites, proteinFamilyDescription);
JSONFilesUtils.writeJSONtcReactions(transportReactions);
///////////////DESCOMENTAR
// checkDescriptions(tcdbMetabolites);
......
package tcdb.tcdbTransportTypesRetriever;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;