Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 6e8ece7c authored by Davide Lagoa's avatar Davide Lagoa
Browse files

research failed queries tcdb

parent e877eedb
......@@ -34,8 +34,9 @@ import pt.uminho.ceb.biosystems.transyt.utilities.files.JSONFilesUtils;
*/
public class FindTransporters {
public static final int LIMIT = 10;
public static final int LIMIT = 5;
public static final int ALL_SEARCH_LIMIT = 2;
public static final List<String> REVERSIBLES = List.of ("⇌", "⇌&nbsp;", "&harr;", "&#8652;", "⇋");
public static final List<String> IRREVERSIBLES = List.of ("&rarr;", "%u21CC", "%u2192", "--&gt;", "&rightarrow;", "&AElig;",
"&agrave;");
......@@ -45,16 +46,16 @@ public class FindTransporters {
public static final List<String> IRREVERSIBLES_IMAGE = List.of ("<img src='arrows/ATP.gif' alt='' />", "<img src='pmf.gif' alt='' />",
"<img src='../../search/pmf.gif' alt='' />", "<img src='arrows/PtsIH.gif' alt='' />",
"<img src='atpgtppmf.gif' alt='' />", "<sub><img src='../../images/upload/eq.gif' alt='' width='47' height='25' />");
public static final List<String> PMF = List.of ("<img src='../../search/pmf.gif' alt='' />", "<img src='atpgtppmf.gif' alt='' />");
public static final List<String> ATP = List.of ("<img src='atpgtppmf.gif' alt='' />");
public static final List<String> GTP = List.of ("<img src='atpgtppmf.gif' alt='' />");
public static final List<String> GET = List.of ("<sub><img src='../../images/upload/eq.gif' alt='' width='47' height='25' />");
public static final String PATH = FilesUtils.getBackupFilesDirectory().concat("TCFilesBackup");
private static final Logger logger = LoggerFactory.getLogger(FindTransporters.class);
/**
* Mathod to store all information about all tcNumber (*.*.* format) from TCDB.
*
......@@ -65,93 +66,104 @@ public class FindTransporters {
public static void saveAllTCFamiliesInformation(Set<String> toSearch) throws IOException, Exception{
Map<String, TcNumberContainer> data = new HashMap<>();
File directory = new File(PATH);
if(!directory.exists())
directory.mkdir();
String path = PATH.concat(FilesUtils.generateFolderName("/version_"));
directory = new File(path);
if(!directory.exists())
directory.mkdir();
// Set<String> toSearch = TcdbExplorer.getTcNumbers();
// Set<String> toSearch = new HashSet<>();
//
// toSearch.add("1.A.6");
List<String> searched = new ArrayList<>();
List<String> failed = new ArrayList<>();
logger.info("Searching family specific information...");
int attempt = 0;
int lastProgress = -1;
int allAttempt = 0;
boolean continueSearch = true;
for(String tc : toSearch) {
while(continueSearch) {
try {
OutputStream out = new FileOutputStream(path.concat(tc).concat(".txt"));
int lastProgress = -1;
LinkConnection conn = new LinkConnection();
for(String tc : toSearch) {
boolean found = false;
attempt = 0;
try {
OutputStream out = new FileOutputStream(path.concat(tc).concat(".txt"));
while(attempt < LIMIT && !found) {
LinkConnection conn = new LinkConnection();
if(conn.getCodeConnection(TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc)) == 200){
FilesUtils.webPageSaver(conn.getPageOpenStream(), out);
boolean found = false;
attempt = 0;
// data.put(tc, getTcContainer(conn.getPage(), tc, data));
while(attempt < LIMIT && !found) {
data.putAll(getTcContainer(conn.getPage(), tc));
if(conn.getCodeConnection(TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc)) == 200){
searched.add(tc);
FilesUtils.webPageSaver(conn.getPageOpenStream(), out);
found = true;
// data.put(tc, getTcContainer(conn.getPage(), tc, data));
TimeUnit.MILLISECONDS.sleep(500);
data.putAll(getTcContainer(conn.getPage(), tc));
}
else {
attempt++;
TimeUnit.SECONDS.sleep(10);
}
searched.add(tc);
}
found = true;
TimeUnit.MILLISECONDS.sleep(500);
}
else {
attempt++;
TimeUnit.SECONDS.sleep(10);
}
}
int progress = ((searched.size()+failed.size())*100)/toSearch.size();
int progress = ((searched.size()+failed.size())*100)/toSearch.size();
if(progress > lastProgress){
if(progress > lastProgress){
lastProgress = progress;
String message = progress + " % search complete";
logger.info(message);
}
lastProgress = progress;
String message = progress + " % search complete";
logger.info(message);
}
if(attempt == LIMIT && !found){
if(attempt == LIMIT && !found){
logger.warn("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
logger.warn("results not found for query: " + TcdbExplorer.TCDB_TCNUMBER_URL.concat(tc));
failed.add(tc);
}
}
catch (Exception e) {
failed.add(tc);
logger.trace("StackTrace {}",e);
}
}
catch (Exception e) {
failed.add(tc);
logger.trace("StackTrace {}",e);
}
if(failed.size() > 0 && allAttempt < ALL_SEARCH_LIMIT) {
allAttempt++;
toSearch = new HashSet<>(failed);
failed = new ArrayList<>();
logger.info("Retrying search of previously failed queries. Attempt nr {}", allAttempt);
}
else {
continueSearch = false;
}
}
if(failed.size() > 0)
logger.warn("The following queries failed: {}", failed.toString());
JSONFilesUtils.writeJSONtcFamilyReactions(data);
}
......@@ -169,7 +181,7 @@ public class FindTransporters {
TcNumberContainer container = new TcNumberContainer();
String html;
String originalTC = tc;
String family = "";
......@@ -230,7 +242,7 @@ public class FindTransporters {
}
}
// System.out.println(expectNextInput);
// System.out.println(expectNextInput);
if((expectReaction || expectNextInput) && !html.contains("1.B.20#ref309\">Poole <em>et al</em>., 1988</a>)") && !html.contains("--&gt; 4 TMSs --&gt; 8 TMSs --&gt; 7 TMSs --&gt;") && !html.contains("&bull")
&& !html.contains("to give 10 TMS proteins: 2 &rarr; 4 &rarr; 5 &rarr; 10") && !html.contains("(<a class=\"reflink\" href=\"/search/result.php?tc=3.E.1#ref2025\">Royant <em>et al</em>., 2001</a>)")) { //hard coded! sorry
......@@ -253,26 +265,26 @@ public class FindTransporters {
}
else {
TypeOfTransporter type;
type = findTypeOfTransport2(reaction, tc);
if(type.equals(TypeOfTransporter.Default)) {
reaction.setProduct(reaction.getProduct().concat(" (out)"));
reaction.setReactant(reaction.getReactant().concat(" (in)"));
reaction.addProperty("reaction string as retrieved from TCDB", "no");
type = TypeOfTransporter.Uniport;
}
reaction.setTransportType(type);
container.addReaction(reaction);
}
......@@ -299,7 +311,7 @@ public class FindTransporters {
}
}
for(String key : tcMap.keySet()) {
TcNumberContainer newContainer = tcMap.get(key);
......@@ -321,7 +333,7 @@ public class FindTransporters {
tcMap.put(key, newContainer);
}
return tcMap;
}
......@@ -490,15 +502,15 @@ public class FindTransporters {
return transport;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Check type of transport based on the reaction.
*
......@@ -512,38 +524,38 @@ public class FindTransporters {
String reactant = reaction.getReactant();
String product = reaction.getProduct();
String lowCaseReactant = reaction.getReactant().toLowerCase();
String lowCaseProduct = reaction.getProduct().toLowerCase();
if(tc.matches("4\\.A\\..+"))
return TypeOfTransporter.PEPdependent;
else if(reactant.contains("ATP") && reactant.contains("Coenzymes A"))
return TypeOfTransporter.BiochemicalCoA;
else if(reactant.contains("ATP"))
return TypeOfTransporter.BiochemicalATP;
else if(reactant.contains("NADH"))
return TypeOfTransporter.RedoxNADH;
else if((lowCaseReactant.contains("reduced") || lowCaseReactant.contains("oxidized"))
&& (lowCaseProduct.contains("reduced") || lowCaseProduct.contains("oxidized")))
return TypeOfTransporter.Redox;
Pair<Set<String>, Set<String>> reactants = compartmentsParser(reactant);
Pair<Set<String>, Set<String>> products = compartmentsParser(product);
Set<String> reactantsMetab = reactants.getA();
Set<String> reactantsComp = reactants.getB();
Set<String> productsMetab = products.getA();
Set<String> productsComp = products.getB();
boolean one = false;
boolean ol = false;
boolean cyt = false;
for(String word : reactantsMetab) {
if(word.matches(".+one"))
one = true;
......@@ -552,7 +564,7 @@ public class FindTransporters {
else if(word.matches("\\d*cyt") || word.matches("cytochrome(s)*"))
cyt = true;
}
for(String word : productsMetab) {
if(word.matches(".+one"))
one = true;
......@@ -561,44 +573,44 @@ public class FindTransporters {
else if(word.matches("\\d*cyt\\w*") || word.matches("\\d*cytochrome(s)*"))
cyt = true;
}
// System.out.println(reactantsMetab);
// System.out.println(reactantsComp);
// System.out.println(productsMetab);
// System.out.println(productsComp);
// System.out.println(reactantsMetab);
// System.out.println(reactantsComp);
// System.out.println(productsMetab);
// System.out.println(productsComp);
if(cyt)
return TypeOfTransporter.Cytochrome;
if(one && ol)
return TypeOfTransporter.RedoxQuinol;
if(reactantsComp.isEmpty() && productsComp.isEmpty())
return TypeOfTransporter.Default;
if(reactantsMetab.containsAll(productsMetab) && productsMetab.containsAll(reactantsMetab)) {
if((reactantsMetab.size() == 1 && productsMetab.size() == 1))
return TypeOfTransporter.Uniport;
else if((reactantsMetab.size() > reactantsComp.size()) && reactantsComp.size() == 1)
return TypeOfTransporter.Symport;
// else if(reactantsComp.size() == 1 && reactantsMetab.size() == 1)
// return TypeOfTransporter.Uniport;
// else if(reactantsComp.size() == 1 && reactantsMetab.size() == 1)
// return TypeOfTransporter.Uniport;
else if(reactantsComp.size() != productsComp.size())
return null;
return TypeOfTransporter.Antiport;
}
else
return TypeOfTransporter.Biochemical;
}
/**
* Method to aux finding the transport types.
*
......@@ -606,68 +618,68 @@ public class FindTransporters {
* @return
*/
private static Pair<Set<String>, Set<String>> compartmentsParser(String query) {
String[] substances = query.replaceAll("in the ", "").split(" \\+ ");
Set<String> compartments = new HashSet<>();
Set<String> metabolites = new HashSet<>();
try {
for(int i = 0; i < substances.length; i++) {
// System.out.println(substances[i]);
// System.out.println(substances[i]);
String[] words = substances[i].split(" \\(");
for(int j = 0; j < words.length; j ++) {
String word = words[j];
// System.out.println(word);
// System.out.println(word);
if(j == 0 && word.startsWith("("))
metabolites.add(word.replaceAll("[^A-Za-z0-9]", "").toLowerCase());
else if(word.contains(")")) {
if(word.contains("or ")) {
}
else if(word.contains("and ")) {
word = word.replace("(and ", "");
metabolites.add(word.replaceAll("[^A-Za-z0-9]", "").toLowerCase());
}
else {
compartments.add(word.replaceAll("\\)", "").toLowerCase());
}
}
else {
metabolites.add(word.replaceAll("[^A-Za-z0-9]", "").toLowerCase());
}
}
}
}
catch (Exception e) {
e.printStackTrace();
}
// Pair<Set<String>, Set<String>> pair = new Pair<>(metabolites, compartments);
// Pair<Set<String>, Set<String>> pair = new Pair<>(metabolites, compartments);
return new Pair<>(metabolites, compartments);
}
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Get the reaction content in the correct format.
......@@ -678,101 +690,101 @@ public class FindTransporters {
private static ReactionContainer getReactionFromText(String html, String text, String tc) {
try {
//
// System.out.println(text);
// System.out.println(html);
//
// System.out.println(text);
// System.out.println(html);
boolean original = true;
boolean pmf = false;
boolean gtp = false;
boolean energy = false;
// if(html.contains("+ (pmf)")) {
// html = html.replaceAll("\\+*\\s*(pmf)", "");
// text = text.replaceAll("\\+*\\s*(pmf)", "");
//
// pmf = true;
// original = false;
// }
// if(html.contains("and other")) {
//
// html = html.replaceAll("and other", "+");
// text = text.replaceAll("and other", "+");
//
// original = false;
// }
// if(html.contains("+ (pmf)")) {
// html = html.replaceAll("\\+*\\s*(pmf)", "");
// text = text.replaceAll("\\+*\\s*(pmf)", "");
//
// pmf = true;
// original = false;
// }
// if(html.contains("and other")) {
//
// html = html.replaceAll("and other", "+");
// text = text.replaceAll("and other", "+");
//
// original = false;
// }
if(text.matches("\\d\\.\\s*.+")){
html = html.replaceAll("\\d\\.+", "");
text = text.replaceAll("\\d\\.+", "");
original = false;
}
if(html.contains("(outer surface of outer membrane)")) {
html = html.replaceAll("\\(outer surface of outer membrane\\)", "(out)");
text = text.replaceAll("\\(outer surface of outer membrane\\)", "(out)");
original = false;
}
if(html.contains("translocation across")) {