Commit c693031a authored by Davide Lagoa's avatar Davide Lagoa
Browse files

blast operation created

parent c45bc230
...@@ -12,11 +12,7 @@ ...@@ -12,11 +12,7 @@
<attribute name="maven.pomderived" value="true"/> <attribute name="maven.pomderived" value="true"/>
</attributes> </attributes>
</classpathentry> </classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER"> <classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes> <attributes>
<attribute name="maven.pomderived" value="true"/> <attribute name="maven.pomderived" value="true"/>
......
package betaBarrels; package betaBarrels;
import java.util.concurrent.TimeUnit;
import org.openqa.selenium.By; import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement; import org.openqa.selenium.WebElement;
......
package betaBarrels; package betaBarrels;
import java.util.HashMap;
import java.util.Map;
public class main { public class main {
public static void main(String[] args) { public static void main(String[] args) {
......
package blast;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Observable;
import java.util.Observer;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.io.FastaReader;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.sequence.io.GenericFastaHeaderParser;
import org.biojava.nbio.core.sequence.io.ProteinSequenceCreator;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ncbi.CreateGenomeFile;
import pt.uminho.ceb.biosystems.merlin.local.alignments.core.RunSimilaritySearch;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import tcdb.tcdbTransportTypesRetriever.TcdbExplorer;
public class Blast implements Observer{
public Blast() {
try {
performBlast();
}
catch (Exception e) {
e.printStackTrace();
}
}
private void performBlast() throws Exception {
//BLAST
System.out.println("INIT BLAST!!");
String filePath = "C:/Users/Davide/Documents/reactionsBuilderTriage/temp/";
String tcdbFastaFile = filePath.concat("tcdbSEQs.txt");
Map<String, AbstractSequence<?>> tcdbGenes = getTcdbInMapFormat();
System.out.println("BIOJAVA TCDB!!");
CreateGenomeFile.buildFastaFile(tcdbFastaFile, tcdbGenes);
System.out.println("BIOJAVA RENATO!!");
ConcurrentHashMap<String, AbstractSequence<?>> sequences= new ConcurrentHashMap<String, AbstractSequence<?>>();
sequences.putAll(FastaReaderHelper.readFastaProteinSequence(new File("C:/Users/Davide/Downloads/nvulgaris_29421_protein.faa")));
System.out.println("BLAST!!");
RunSimilaritySearch run_similaritySearch = new RunSimilaritySearch(tcdbGenes, 0.1,
Method.SmithWaterman, sequences, new AtomicBoolean(false), new AtomicInteger(0), new AtomicInteger(0), AlignmentScoreType.ALIGNMENT);
run_similaritySearch.setTcdbFastaFilePath(tcdbFastaFile);
run_similaritySearch.addObserver(this);
run_similaritySearch.setCurrentTempFolderDirectory(filePath);
//////set temp directory here!!!
ConcurrentLinkedQueue<AlignmentCapsule> results = null;
if(sequences.keySet().size()>0)
results = run_similaritySearch.runBlastSearch(true);
System.out.println("BLAST PERFORMED!!");
}
/**
*
* @param url
* @return
* @throws Exception
*/
public static Map<String, AbstractSequence<?>> getTcdbInMapFormat() throws Exception {
InputStream tcdbInputStream = (new URL(TcdbExplorer.TCDB_FASTA_URL)).openStream();
BufferedReader br= new BufferedReader(new InputStreamReader(tcdbInputStream));
StringBuilder sb = new StringBuilder();
String line;
while ((line = br.readLine()) != null)
sb.append(line.concat("\n"));
String theString = sb.toString().replace("</p>", "").replace("<p>", "").replace(">gnl|TC-DB|xxxxxx 3.A.1.205.14 \ndsfgdfg", "");
byte[] bytes = theString.getBytes("utf-8");
tcdbInputStream = new ByteArrayInputStream(bytes);
FastaReader<ProteinSequence,AminoAcidCompound> fastaReader = new FastaReader<ProteinSequence,AminoAcidCompound>(
tcdbInputStream,
//tcdbFile,
new GenericFastaHeaderParser<ProteinSequence,AminoAcidCompound>(),
new ProteinSequenceCreator(AminoAcidCompoundSet.getAminoAcidCompoundSet()));
// System.out.println("CHECK3---->"+LocalDateTime.now().getHour()+":"+LocalDateTime.now().getMinute()+":"+LocalDateTime.now().getSecond());
Map<String, AbstractSequence<?>> tcdb = new HashMap<>();
tcdb.putAll(fastaReader.process());
return tcdb;
}
@Override
public void update(Observable arg0, Object arg1) {
// TODO Auto-generated method stub
}
}
...@@ -348,6 +348,8 @@ public class Compare { ...@@ -348,6 +348,8 @@ public class Compare {
workbook.write(fileOut); workbook.write(fileOut);
fileOut.flush(); fileOut.flush();
fileOut.close(); fileOut.close();
workbook.close();
System.out.println("xlsx containing the comparison results created"); System.out.println("xlsx containing the comparison results created");
......
...@@ -26,13 +26,18 @@ public class GenerateTransportReactions { ...@@ -26,13 +26,18 @@ public class GenerateTransportReactions {
* @param tcdbMetabolites * @param tcdbMetabolites
* @return * @return
*/ */
public static Map<String, Map<String, TcNumberContainer>> generateReactions(Map<String, TcNumberContainer> data, Map<String, TcdbMetabolitesContainer> tcdbMetabolites) { public static Map<String, Map<String, TcNumberContainer>> generateReactions(Map<String, TcNumberContainer> data,
Map<String, TcdbMetabolitesContainer> tcdbMetabolites, Map<String, String> proteinFamilyDescription) {
Map<String, Map<String, TcNumberContainer>> mainMap = new HashMap<>(); Map<String, Map<String, TcNumberContainer>> mainMap = new HashMap<>();
Synonyms dictionary = new Synonyms(); Synonyms dictionary = new Synonyms();
int noMetabolitesNoReactionsCounter = 0; int noMetabolitesNoReactionsCounter = 0;
// Map<String, TcdbMetabolitesContainer> tcdbMetabolites = new HashMap<>();
//
// tcdbMetabolites.put("O51918", tcdbMetabolitessss.get("O51918"));
for(String accession : tcdbMetabolites.keySet()) { for(String accession : tcdbMetabolites.keySet()) {
...@@ -61,9 +66,12 @@ public class GenerateTransportReactions { ...@@ -61,9 +66,12 @@ public class GenerateTransportReactions {
for(int id : tcNumberContainer.getAllReactionsIds()) { for(int id : tcNumberContainer.getAllReactionsIds()) {
ReactionContainer reactionContainer = tcNumberContainer.getReactionContainer(id); ReactionContainer reactionContainer = tcNumberContainer.getReactionContainer(id);
TypeOfTransporter evidence = checkForEvidenceOfTransport(tcdbMetContainer.getDescription(tcNumber), tcNumber); TypeOfTransporter evidence = checkForEvidenceOfTransport(tcdbMetContainer.getDescription(tcNumber), tcNumber);
if(evidence == null && proteinFamilyDescription.containsKey(tcNumber.replaceAll("(\\.\\d+)$", "")))
evidence = checkForEvidenceOfTransport(proteinFamilyDescription.get(tcNumber.replaceAll("(\\.\\d+)$", "")), tcNumber);
// System.out.println(accession); // System.out.println(accession);
Boolean antiportOrSymport = selectMethodOfMetabolitesDistribution(reactionContainer.getReaction(), reactionContainer.getTransportType(), dictionary); Boolean antiportOrSymport = selectMethodOfMetabolitesDistribution(reactionContainer.getReaction(), reactionContainer.getTransportType(), dictionary);
...@@ -83,8 +91,6 @@ public class GenerateTransportReactions { ...@@ -83,8 +91,6 @@ public class GenerateTransportReactions {
// Set<String> metabolites = tcdbMetContainer.getMetabolites(tcNumber); // Set<String> metabolites = tcdbMetContainer.getMetabolites(tcNumber);
if(evidence == null) { if(evidence == null) {
if(reactionContainer.getTransportType().equals(TypeOfTransporter.Uniport) || reactionContainer.getTransportType().equals(TypeOfTransporter.Symport)) if(reactionContainer.getTransportType().equals(TypeOfTransporter.Uniport) || reactionContainer.getTransportType().equals(TypeOfTransporter.Symport))
...@@ -109,7 +115,7 @@ public class GenerateTransportReactions { ...@@ -109,7 +115,7 @@ public class GenerateTransportReactions {
newTcContainer = processAntiportReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession); newTcContainer = processAntiportReactions(reactionContainer, newTcContainer, metabolites, dictionary, tcNumber+" "+accession);
// } // }
else else if(!tcNumberContainer.evidenceTransportTypeExists(evidence))
newTcContainer = correctReaction(tcNumber, reactionContainer.getReaction(), "C-1" ,evidence, reactionContainer.getProperties(), metabolites, newTcContainer); newTcContainer = correctReaction(tcNumber, reactionContainer.getReaction(), "C-1" ,evidence, reactionContainer.getProperties(), metabolites, newTcContainer);
// System.out.println(tcNumber+" "+accession + " " + evidence + " " + reactionContainer.getTransportType() + " " + reactionContainer.getReaction() + "\t" + metabolites); // System.out.println(tcNumber+" "+accession + " " + evidence + " " + reactionContainer.getTransportType() + " " + reactionContainer.getReaction() + "\t" + metabolites);
...@@ -133,7 +139,8 @@ public class GenerateTransportReactions { ...@@ -133,7 +139,8 @@ public class GenerateTransportReactions {
if(newTcContainer.getAllReactionsIds().size() == 0) //reactions with generic reactions such as if(newTcContainer.getAllReactionsIds().size() == 0) //reactions with generic reactions such as
noMetabolitesNoReactionsCounter++; //solute1 + solute2 <-> solute1 + solute2 are also included here because no metabolites where found for them noMetabolitesNoReactionsCounter++; //solute1 + solute2 <-> solute1 + solute2 are also included here because no metabolites where found for them
newTcContainer = filterUnwantedReactions(tcNumber, newTcContainer);
newContainers.put(tcNumber, newTcContainer); newContainers.put(tcNumber, newTcContainer);
} }
...@@ -155,6 +162,24 @@ public class GenerateTransportReactions { ...@@ -155,6 +162,24 @@ public class GenerateTransportReactions {
} }
private static TcNumberContainer filterUnwantedReactions(String tcNumber, TcNumberContainer newTcContainer) {
Set<TypeOfTransporter> set = newTcContainer.getAllTransportTypesAssociated();
if(set.size() > 1) {
for(int id : newTcContainer.getAllReactionsIds()) {
ReactionContainer container = newTcContainer.getReactionContainer(id);
if(!container.getTransportType().equals(TypeOfTransporter.Uniport))
newTcContainer.removeReaction(id);
}
}
return newTcContainer;
}
/** /**
* Process metabolites associated to the wrong transport types. * Process metabolites associated to the wrong transport types.
* *
...@@ -256,18 +281,26 @@ public class GenerateTransportReactions { ...@@ -256,18 +281,26 @@ public class GenerateTransportReactions {
* @return * @return
*/ */
private static TypeOfTransporter checkForEvidenceOfTransport(String description, String tcNumber) { private static TypeOfTransporter checkForEvidenceOfTransport(String description, String tcNumber) {
if(tcNumber.equals("2.A.1.4.8")) try {
return TypeOfTransporter.Uniport; if(tcNumber.equals("2.A.1.4.8"))
return TypeOfTransporter.Uniport;
if(description.matches("(?i).*uniporte*r*[\\s*\\.*].*"))
return TypeOfTransporter.Uniport; if(description.matches("(?i).*uniporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Uniport;
else if(description.matches("(?i).*symporte*r*[\\s*\\.*].*"))
return TypeOfTransporter.Symport; else if(description.matches("(?i).*symporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Symport;
else if(description.matches("(?i).*antiporte*r*[\\s*\\.*]"))
return TypeOfTransporter.Antiport; else if(description.matches("(?i).*antiporte*r*[\\s*\\.*]*.*"))
return TypeOfTransporter.Antiport;
}
catch (Exception e) {
System.out.println(tcNumber);
System.out.println(description);
e.printStackTrace();
}
return null; return null;
} }
...@@ -745,7 +778,7 @@ public class GenerateTransportReactions { ...@@ -745,7 +778,7 @@ public class GenerateTransportReactions {
reactant = aux[1]; reactant = aux[1];
product = aux[2]; product = aux[2];
Set<String> reactions = getAllPossibleReactions(originalReaction, reactant, product, reactionContainer.getReversible(), tc); Set<String> reactions = getAllPossibleReactions(originalReaction, reactant, product, reactionContainer.isReversible(), tc);
// if(tc.equals("2.A.20.1.2 P43676")) { // if(tc.equals("2.A.20.1.2 P43676")) {
// System.out.println("Areactions " + reactions); // System.out.println("Areactions " + reactions);
...@@ -757,7 +790,7 @@ public class GenerateTransportReactions { ...@@ -757,7 +790,7 @@ public class GenerateTransportReactions {
// System.out.println(react); // System.out.println(react);
String[] newReaction; String[] newReaction;
if(reactionContainer.getReversible()) if(reactionContainer.isReversible())
newReaction = react.split(ReactionContainer.REV_TOKEN); newReaction = react.split(ReactionContainer.REV_TOKEN);
else else
newReaction = react.split(ReactionContainer.IRREV_TOKEN); newReaction = react.split(ReactionContainer.IRREV_TOKEN);
...@@ -785,7 +818,7 @@ public class GenerateTransportReactions { ...@@ -785,7 +818,7 @@ public class GenerateTransportReactions {
// System.out.println(); // System.out.println();
// } // }
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.getReversible()); ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.isReversible());
newReactContainer.setTransportType(reactionContainer.getTransportType()); newReactContainer.setTransportType(reactionContainer.getTransportType());
newReactContainer.setProperties(reactionContainer.getProperties()); newReactContainer.setProperties(reactionContainer.getProperties());
...@@ -1667,7 +1700,7 @@ public class GenerateTransportReactions { ...@@ -1667,7 +1700,7 @@ public class GenerateTransportReactions {
// System.out.println(); // System.out.println();
// } // }
ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.getReversible()); ReactionContainer newReactContainer = new ReactionContainer(result[0], result[1], reactionContainer.isReversible());
newReactContainer.setTransportType(reactionContainer.getTransportType()); newReactContainer.setTransportType(reactionContainer.getTransportType());
newReactContainer.setProperties(reactionContainer.getProperties()); newReactContainer.setProperties(reactionContainer.getProperties());
......
...@@ -39,15 +39,15 @@ public class FindTransportTypes { ...@@ -39,15 +39,15 @@ public class FindTransportTypes {
* @throws IOException * @throws IOException
* @throws Exception * @throws Exception
*/ */
public static Map<String, TcNumberContainer> getAllTCNumbersInformation() throws IOException, Exception{ public static Map<String, TcNumberContainer> getAllTCNumbersInformation(Set<String> toSearch) throws IOException, Exception{
Map<String, TcNumberContainer> data = new HashMap<>(); Map<String, TcNumberContainer> data = new HashMap<>();
// Set<String> toSearch = TcdbExplorer.getTcNumbers(); // Set<String> toSearch = TcdbExplorer.getTcNumbers();
//
Set<String> toSearch = new HashSet<>(); //// Set<String> toSearch = new HashSet<>();
//
toSearch.add("2.A.37"); //// toSearch.add("2.A.37");
List<String> searched = new ArrayList<>(); List<String> searched = new ArrayList<>();
......
...@@ -30,7 +30,7 @@ import tcdb.capsules.TcNumberContainer; ...@@ -30,7 +30,7 @@ import tcdb.capsules.TcNumberContainer;
public class FindTransporters { public class FindTransporters {
public static final int LIMIT = 10; public static final int LIMIT = 10;
private static final String URL = "http://www.tcdb.org/search/result.php?tc=";
public static final List<String> REVERSIBLES = List.of ("⇌", "⇌&nbsp;", "&harr;", "&#8652;", "⇋"); public static final List<String> REVERSIBLES = List.of ("⇌", "⇌&nbsp;", "&harr;", "&#8652;", "⇋");
public static final List<String> IRREVERSIBLES = List.of ("&rarr;", "%u21CC", "%u2192", "--&gt;", "&rightarrow;", "&AElig;", public static final List<String> IRREVERSIBLES = List.of ("&rarr;", "%u21CC", "%u2192", "--&gt;", "&rightarrow;", "&AElig;",
"&agrave;"); "&agrave;");
...@@ -90,7 +90,7 @@ public class FindTransporters { ...@@ -90,7 +90,7 @@ public class FindTransporters {
while(attempt < LIMIT && !found) { while(attempt < LIMIT && !found) {
if(conn.getCodeConnection(URL.concat(tc)) == 200){ if(conn.getCodeConnection(TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc)) == 200){
FilesUtils.webPageSaver(conn.getPageOpenStream(), out); FilesUtils.webPageSaver(conn.getPageOpenStream(), out);
...@@ -122,7 +122,7 @@ public class FindTransporters { ...@@ -122,7 +122,7 @@ public class FindTransporters {
if(attempt == LIMIT && !found){ if(attempt == LIMIT && !found){
System.out.println("results not found for query: " + URL.concat(tc)); System.out.println("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
failed.add(tc); failed.add(tc);
} }
...@@ -973,5 +973,5 @@ public class FindTransporters { ...@@ -973,5 +973,5 @@ public class FindTransporters {
return null; return null;
} }
} }
...@@ -50,10 +50,10 @@ public class ReadFastaTcdb { ...@@ -50,10 +50,10 @@ public class ReadFastaTcdb {
String accession = "", tcNumber = "", organism = "", description = ""; String accession = "", tcNumber = "", organism = "", description = "";
Map<String, Integer> distributions1 = new TreeMap<>(); // Map<String, Integer> distributions1 = new TreeMap<>();
Map<String, Integer> distributions2 = new TreeMap<>(); // Map<String, Integer> distributions2 = new TreeMap<>();
Map<String, Integer> distributions3 = new TreeMap<>(); // Map<String, Integer> distributions3 = new TreeMap<>();
Map<String, Integer> distributions4 = new TreeMap<>(); // Map<String, Integer> distributions4 = new TreeMap<>();
Map<String, Integer> distributions5 = new TreeMap<>(); Map<String, Integer> distributions5 = new TreeMap<>();
...@@ -115,53 +115,53 @@ public class ReadFastaTcdb { ...@@ -115,53 +115,53 @@ public class ReadFastaTcdb {
// System.out.println("tc--" + tcNumber); // System.out.println("tc--" + tcNumber);
String[] newTcNumber = tcNumber.split("\\."); // String[] newTcNumber = tcNumber.split("\\.");
//
// System.out.println(newTcNumber.length); // // System.out.println(newTcNumber.length);
//
String newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]).concat(".").concat(newTcNumber[3]); // String newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]).concat(".").concat(newTcNumber[3]);
//
if(distributions4.containsKey(newTc)) { // if(distributions4.containsKey(newTc)) {
//
int count = distributions4.get(newTc); // int count = distributions4.get(newTc);
distributions4.put(newTc, count+1); // distributions4.put(newTc, count+1);
} // }
//
else // else
distributions4.put(newTc, 1); // distributions4.put(newTc, 1);
//
newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]); // newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]).concat(".").concat(newTcNumber[2]);
//
if(distributions3.containsKey(newTc)) { // if(distributions3.containsKey(newTc)) {
//
int count = distributions3.get(newTc); // int count = distributions3.get(newTc);
distributions3.put(newTc, count+1); // distributions3.put(newTc, count+1);
} // }
//
else // else
distributions3.put(newTc, 1); // distributions3.put(newTc, 1);
//
newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]); // newTc = newTcNumber[0].concat(".").concat(newTcNumber[1]);
//
if(distributions2.containsKey(newTc)) { // if(distributions2.containsKey(newTc)) {
//
int count = distributions2.get(newTc); // int count = distributions2.get(newTc);
distributions2.put(newTc, count+1); // distributions2.put(newTc, count+1);
} // }
//
else // else
distributions2.put(newTc, 1); // distributions2.put(newTc, 1);
//
newTc = newTcNumber[0]; // newTc = newTcNumber[0];
//
if(distributions1.containsKey(newTc)) { // if(distributions1.containsKey(newTc)) {
//
int count = distributions1.get(newTc); // int count = distributions1.get(newTc);
distributions1.put(newTc, count+1); // distributions1.put(newTc, count+1);
} // }