Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit dffb8f03 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

minor improvements, modelseedAPI added, conjugated pairs exception created

parent c99b7898
package pt.uminho.ceb.biosystems.transyt.scraper.APIs;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.transyt.utilities.connection.LinkConnection;
import pt.uminho.ceb.biosystems.transyt.utilities.connection.TcdbExplorer;
import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
public class ModelSEEDAPI {
public static final String BASE_URL_REACTIONS = "https://raw.githubusercontent.com/ModelSEED/ModelSEEDDatabase/master/Biochemistry/reactions.tsv";
public static final String BASE_URL_COMPOUNDS = "https://raw.githubusercontent.com/ModelSEED/ModelSEEDDatabase/master/Biochemistry/compounds.tsv";
public static final int LIMIT = 5;
public static final int BATCH_SIZE = 10; //KEGG is limited to 10 items per request
private static final String PATH_LAST_KNOWN_VERSION_MODELSEED_REACTIONS = FilesUtils.getModelseedReactionsFilesDirectory().concat("tcdbLastKnownVersion.log");
private static final String PATH_LAST_KNOWN_VERSION_MODELSEED_COMPOUNDS = FilesUtils.getModelseedCompoundsFilesDirectory().concat("tcdbLastKnownVersion.log");
private static final String MODELSEED_REACTIONS_FILE_NAME = "modelseed_reactions";
private static final String MODELSEED_COMPOUNDS_FILE_NAME = "modelseed_compounds";
private static final Logger logger = LoggerFactory.getLogger(ModelSEEDAPI.class);
/**
* Get identifier and equations from ModelSEED.
*
* @param useCache
* @return
* @throws Exception
*/
public static Map<String, String> getModelseedCompoundsFromGithu(boolean useCache) throws Exception {
logger.info("Downloading latest ModelSEED compounds file from: " + BASE_URL_COMPOUNDS);
Map<String, String> res = new HashMap<>();
BufferedReader reader = getLatestCompoundsListFile(useCache);
String html;
boolean body = false;
while ((html = reader.readLine()) != null){
try {
if(body) {
String[] data = html.split("\t");
if(Integer.valueOf(data[9]) == 0) //check if not obsolete
res.put(data[0], data[18]);
}
else {
body = true;
}
}
catch (Exception e) {
e.printStackTrace();
}
}
reader.close();
logger.info("ModelSEED compounds download complete!");
return res;
}
/**
* Get identifier and equations from ModelSEED.
*
* @param useCache
* @return
* @throws Exception
*/
public static Map<String, String> getModelseedReaction(boolean useCache) throws Exception {
logger.info("Downloading latest ModelSEED reactions file from: " + BASE_URL_REACTIONS);
Map<String, String> res = new HashMap<>();
BufferedReader reader = getLatestReactionsListFile(useCache);
String html;
boolean body = false;
while ((html = reader.readLine()) != null){
try {
if(body) {
String[] data = html.split("\t");
if(Integer.valueOf(data[18]) == 0) //check if not obsolete
res.put(data[0], data[6]);
}
else {
body = true;
}
}
catch (Exception e) {
e.printStackTrace();
}
}
reader.close();
logger.info("ModelSEED reactions download complete!");
return res;
}
/**
* Method to get the latest file from ModelSEED's repository, unless cache is requested! This can become generic for reactions and compounds if needed
*
* @param useCache
* @return
* @throws InterruptedException
* @throws FileNotFoundException
*/
private static BufferedReader getLatestCompoundsListFile(boolean useCache) throws InterruptedException, FileNotFoundException {
try {
if(!useCache) {
String filePath = FilesUtils.getModelseedCompoundsFilesDirectory().concat(FilesUtils.generateFileName(MODELSEED_COMPOUNDS_FILE_NAME, ".tsv"));
OutputStream out = new FileOutputStream(filePath);
LinkConnection conn = new LinkConnection();
if(conn.getCodeConnection(BASE_URL_COMPOUNDS) == 200) {
conn.webPageSaver(conn.getPageOpenStream(), out);
FilesUtils.saveLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_COMPOUNDS, filePath);
return conn.getPage();
}
out.close();
}
String lastFilePath = FilesUtils.getLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_COMPOUNDS);
return new BufferedReader(new FileReader(lastFilePath));
}
catch (Exception e) {
e.printStackTrace();
String lastFilePath = FilesUtils.getLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_COMPOUNDS);
return new BufferedReader(new FileReader(lastFilePath));
}
}
/**
* Method to get the latest file from ModelSEED's repository, unless cache is requested!
*
* @param useCache
* @return
* @throws InterruptedException
* @throws FileNotFoundException
*/
private static BufferedReader getLatestReactionsListFile(boolean useCache) throws InterruptedException, FileNotFoundException {
try {
if(!useCache) {
String filePath = FilesUtils.getModelseedReactionsFilesDirectory().concat(FilesUtils.generateFileName(MODELSEED_REACTIONS_FILE_NAME, ".tsv"));
OutputStream out = new FileOutputStream(filePath);
LinkConnection conn = new LinkConnection();
if(conn.getCodeConnection(BASE_URL_REACTIONS) == 200) {
conn.webPageSaver(conn.getPageOpenStream(), out);
FilesUtils.saveLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_REACTIONS, filePath);
return conn.getPage();
}
out.close();
}
String lastFilePath = FilesUtils.getLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_REACTIONS);
return new BufferedReader(new FileReader(lastFilePath));
}
catch (Exception e) {
e.printStackTrace();
String lastFilePath = FilesUtils.getLastKnownVersion(PATH_LAST_KNOWN_VERSION_MODELSEED_REACTIONS);
return new BufferedReader(new FileReader(lastFilePath));
}
}
}
......@@ -35,6 +35,7 @@ public class FindTransporters {
public static final int LIMIT = 5;
public static final int ALL_SEARCH_LIMIT = 2;
public static final int MAX_REACTANT_CHAR = 140;
public static final List<String> REVERSIBLES = List.of ("⇌", "⇌&nbsp;", "&harr;", "&#8652;", "⇋");
public static final List<String> IRREVERSIBLES = List.of ("&rarr;", "%u21CC", "%u2192", "--&gt;", "&rightarrow;", "&AElig;",
......@@ -89,7 +90,7 @@ public class FindTransporters {
boolean continueSearch = true;
while(continueSearch) {
int lastProgress = -1;
for(String tc : toSearch) {
......@@ -162,7 +163,7 @@ public class FindTransporters {
if(failed.size() > 0)
logger.warn("The following queries failed: {}", failed.toString());
JSONFilesUtils.writeJSONtcFamilyReactions(data);
}
......@@ -283,7 +284,11 @@ public class FindTransporters {
reaction.setProduct(reaction.getProduct().replace("S ", "Solute ")); //avoids the confusion of the algorithm with Sulfur
reaction.setReactant(reaction.getReactant().replace("S ", "Solute "));
container.addReaction(reaction);
if((reaction.getReactant().length() > 0 && reaction.getReactant().length() < MAX_REACTANT_CHAR) && //attempt to ignore false positives
(reaction.getProduct().length() > 0 && reaction.getProduct().length() < MAX_REACTANT_CHAR)) {
container.addReaction(reaction);
}
}
}
......@@ -381,21 +386,21 @@ public class FindTransporters {
if(reactantsComp.isEmpty() && productsComp.isEmpty())
return TypeOfTransporter.Default;
if(reactantsMetab.contains("hv") || reactantsMetab.contains("light") ||
reactantsMetab.contains("photon") || reactantsMetab.contains("hnu"))
return TypeOfTransporter.Light;
if(reactantsMetab.containsAll(productsMetab) && productsMetab.containsAll(reactantsMetab)) {
if((reactantsMetab.size() == 1 && productsMetab.size() == 1))
return TypeOfTransporter.Uniport;
else if((reactantsMetab.size() > reactantsComp.size()) && reactantsComp.size() == 1) {
if(reactantsMetab.size() == 2 && reaction.getReactant().matches(".*\\s+[^\\+]*.*\\(\\s*and.*")) //false positive. Ex: "Ca2+ (and other cations) (out)"
return TypeOfTransporter.Uniport;
return TypeOfTransporter.Symport;
}
// else if(reactantsComp.size() == 1 && reactantsMetab.size() == 1)
......@@ -411,7 +416,7 @@ public class FindTransporters {
return TypeOfTransporter.Biochemical;
}
/**
* Check type of transport based on the reaction (composed of metacyc ids only).
*
......@@ -422,7 +427,7 @@ public class FindTransporters {
String reactant = reaction.getReactant();
String product = reaction.getProduct();
if(reaction.getReaction().contains(ReactionContainer.MIDDLE_COMPARTMENT_TOKEN))
return TypeOfTransporter.Biochemical;
......@@ -432,10 +437,10 @@ public class FindTransporters {
else if(reactant.contains(":ATP") && reactant.contains(":CO-A"))
return TypeOfTransporter.BiochemicalCoA;
else if(reactant.contains(":ATP") || reactant.contains(":GTP"))
else if(reactant.contains(":ATP") && !product.contains(":ATP"))
return TypeOfTransporter.BiochemicalATP;
else if(reactant.contains(":GTP"))
else if(reactant.contains(":GTP") && !product.contains(":GTP"))
return TypeOfTransporter.BiochemicalGTP;
else if(reactant.contains(":NADH")) {
......@@ -456,11 +461,11 @@ public class FindTransporters {
if(reactantsComp.isEmpty() && productsComp.isEmpty())
return TypeOfTransporter.Default;
if(reactantsMetab.contains(":hv") || reactantsMetab.contains(":light") ||
reactantsMetab.contains(":photon") || reactantsMetab.contains(":hnu"))
return TypeOfTransporter.Light;
if(reactantsMetab.containsAll(productsMetab) && productsMetab.containsAll(reactantsMetab)) {
if((reactantsMetab.size() == 1 && productsMetab.size() == 1))
......@@ -498,7 +503,7 @@ public class FindTransporters {
// System.out.println(substances[i]);
String[] words = substances[i].split(" \\(");
if(words.length == 1 && substances[i].contains("+(")) {
substances[i] = substances[i].replace("+(", "+ (");
words = substances[i].split(" \\(");
......
......@@ -20,9 +20,9 @@ import pt.uminho.ceb.biosystems.transyt.utilities.files.FilesUtils;
public class ProcessCompartments {
private static final String DIC_PATH = FilesUtils.getDictionatiesAndConfigurationsDirectory().concat("dictionaryComp.txt");
private static final List<String> EXCEPTIONS = List.of ("5.B.9", "5.B.8", "5.B.2", "5.B.1", "3.A.6", "5.B.12", "3.D.3",
private static final List<String> EXCEPTIONS = List.of ("5.B.9", "5.B.8", "5.B.1", "3.A.6", "5.B.12", //this should go to the exceptions file
"3.A.21", "1.A.25", "2.A.9", "2.A.64", "1.E.20", "3.D.10", "1.B.53", "9.B.35", "9.A.41", "1.A.17", "1.A.3",
"3.D.9", "4.E.1","1.B.8","2.A.23","2.A.1", "5.A.3", "9.B.16", "3.D.4", "3.D.6");
"3.D.9", "4.E.1","1.B.8","2.A.1", "9.B.16");
private static final Logger logger = LoggerFactory.getLogger(ProcessCompartments.class);
......@@ -53,7 +53,7 @@ public class ProcessCompartments {
ReactionContainer reactContainer = tcContainer.getReactionContainer(id);
// if(tc.equals("2.A.39"))
// if(tc.equals("1.A.17"))
// System.out.println(reactContainer.getReaction());
//
String reactant = reactContainer.getReactant().replaceAll("in the ", "").replaceAll("\\(non-selective\\)\\s*", "").replaceAll("\\(and other compounds\\)\\s*", "and compounds")
......@@ -76,7 +76,7 @@ public class ProcessCompartments {
reactant = reactant.replaceAll("\\(periplasm of Gram-negative bacteria\\)", "");
// if(tc.equals("3.A.5"))
// if(tc.equals("1.A.17"))
// System.out.println(product);
if(EXCEPTIONS.contains(tc)) {
......@@ -122,16 +122,21 @@ public class ProcessCompartments {
// reactContainer.replaceReactant(reactant); //delete after tests
// reactContainer.replaceProduct(product); //delete after tests
reaction = findCompartmentsRelativePosition(reactant, product, compartmentsList);
reactant = reaction[0];
product = reaction[1];
try {
reaction = findCompartmentsRelativePosition(reactant, product, compartmentsList);
reactant = reaction[0];
product = reaction[1];
// if(tc.equals("2.A.39"))
// System.out.println(reactant + "\t" +product);
reactContainer.replaceReactant(reactant);
reactContainer.replaceProduct(product);
reactContainer.replaceReactant(reactant);
reactContainer.replaceProduct(product);
} catch (Exception e) {
System.out.println(tc);
e.printStackTrace();
}
}
......@@ -237,11 +242,6 @@ public class ProcessCompartments {
reactant = reactant.replaceAll("e.g., in reduced cytochrome periplasm", "in").replace("extracellular", "out");
product = product.replace("extracellular", "out");
}
else if(tc.equals("5.B.2") && id == 0) {
reactant = reactant.replaceAll("\\(oxidized\\) ", "").replaceAll("cytosol", "in").replaceAll("synaptic vesicle", "out");
product = product.replaceAll("\\(reduced\\) ", "").replaceAll("cytosol", "in").replaceAll("synaptic vesicle", "out");
}
else if(tc.equals("5.B.1") && id == 1) {
product = product.replaceAll("\\(superoxide\\) ", "");
......@@ -256,16 +256,6 @@ public class ProcessCompartments {
reactant = reactant.replaceAll("periplasmic electron donor", "out");
product = product.replaceAll("cytoplasmic sulfite reductase, DsrABC", "in");
}
else if(tc.equals("3.D.3") && id == 0) {
reactant = reactant.replaceAll("\\(QH2\\) ", "").replaceAll("\\(ox\\) ", "");
product = product.replaceAll("\\(Q\\) ", "").replaceAll("\\(red\\) ", "");
}
else if(tc.equals("3.D.4") && id == 0) {
reactant = reactant.replaceAll("\\(red\\) ", "");
product = product.replaceAll("\\(ox\\) ", "");
}
else if(tc.equals("3.A.21") && id == 0) {
//membrane inserted -> endoplasmatic reticulum
......@@ -308,7 +298,7 @@ public class ProcessCompartments {
else if(tc.equals("1.A.17") && id == 1) { //this exception should not be necessary, something is happening while retrieving the reaction. Check this later
reactant = reactant.replaceAll("\\(e.g., Ca2\\+\\) ", "");
product = product.concat("(in)");
product = product.concat(" " + ReactionContainer.INTERIOR_COMPARTMENT_TOKEN);
}
else if(tc.equals("1.A.3") && id == 0) {
......@@ -329,10 +319,6 @@ public class ProcessCompartments {
product = product.replaceAll("intermembrane space", "in");
}
else if(tc.equals("2.A.23") && (id == 0 || id == 1)) {
reactant = reactant.replaceAll("\\(dicarboxylate or amino acid\\) ", "");
}
else if(tc.equals("2.A.1") && id == 2) {
product = product.replaceAll("\\(S1 may be H\\+ or a solute\\)", "");
......@@ -342,20 +328,12 @@ public class ProcessCompartments {
reactant = reactant.replaceAll("\\(NO3\\-\\) ", "");
product = product.replaceAll("\\(NO2\\-\\) ", "");
}
else if(tc.equals("5.A.3") && id == 1) {
reactant = reactant.replaceAll("\\(HCO2\\-\\) ", "");
}
else if(tc.equals("9.B.16") && id == 0) {
reactant = reactant.replaceAll("cytoplasm", "in");
product = product.replaceAll("out or cytoplasm of an adjacent cell", "out");
}
else if(tc.equals("3.D.6") && id == 0) {
reactant = reactant.replaceAll("\\(H\\+ or Na\\+\\)", "H\\+ or Na\\+");
product = product.replaceAll("\\(H\\+ or Na\\+\\)", "H\\+ or Na\\+");
}
else
return null;
......
......@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.transyt.scraper.APIs.KeggAPI;
import pt.uminho.ceb.biosystems.transyt.scraper.APIs.MetaCycAPI;
import pt.uminho.ceb.biosystems.transyt.scraper.APIs.ModelSEEDAPI;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.reactionsGenerator.GenerateTransportReactions;
import pt.uminho.ceb.biosystems.transyt.scraper.tcdb.utilities.ProcessTcdbMetabolitesExcel;
import pt.uminho.ceb.biosystems.transyt.utilities.capsules.ReactionContainer;
......@@ -28,7 +29,7 @@ public class Retriever {
public static void runRetriever(boolean useCache, boolean tests, String accTest) throws Exception {
try {
if(!useCache) {
logger.info("Retrieving TCDB FASTA file...");
......@@ -38,7 +39,7 @@ public class Retriever {
public void run () {
try {
runMetacycSraper();
runMetacycScraper();
}
catch (Exception e) {
logger.error("Fatal error while scraping MetaCyc... Exiting TranSyT...");
......@@ -103,7 +104,7 @@ public class Retriever {
else
tcdbMetabolites = tcdbMetabolitesAux;
Map<String, Map<String, ReactionContainer>> metaCycData = JSONFilesUtils.readMetaCycDataBackupFile();
Map<String, Map<String, ReactionContainer>> metaCycData = JSONFilesUtils.readMetaCycDataBackupFile(getMetacycCpdToModelseed(useCache));
Map<String, Map<String, TcNumberContainer>> transportReactions =
GenerateTransportReactions.generateReactions(data2, metaCycData, tcdbMetabolites, proteinFamilyDescription);
......@@ -128,11 +129,38 @@ public class Retriever {
}
/**
* @return
* @throws Exception
*/
public static Map<String, String> getMetacycCpdToModelseed(boolean useCache) throws Exception{
Map<String, String> compounds = ModelSEEDAPI.getModelseedCompoundsFromGithu(useCache);
Map<String, String> metacycMapping = new HashMap<>();
for(String cpd : compounds.keySet()) {
String[] aliases = compounds.get(cpd).split("\\|");
for(String alias : aliases) {
if(alias.contains(" CPD")) {
String identifier = alias.split("CPD")[1].split(";")[0];
metacycMapping.put("CPD" + identifier, cpd);
}
}
}
return metacycMapping;
}
/**
*
*/
private static void runMetacycSraper() {
private static void runMetacycScraper() {
logger.info("Retrieving MetaCyc data...");
......@@ -174,9 +202,10 @@ public class Retriever {
Map<String, TcNumberContainer> exceptions = JSONFilesUtils.readJSONExceptionsFile();
for(String key : exceptions.keySet())
for(String key : exceptions.keySet()) {
data.put(key, exceptions.get(key));
}
return data;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment