Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 09194e03 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

check reactions balance, provide reactions to genes

parent 5bb841d1
package containers;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import files.FilesUtils;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import reactions.IdentifyReactionsMetabolites;
public class BiosynthMetabolites {
private Set<String> toDelete = FilesUtils.readWordsInFile("C:/Users/Davide/Documents/MetabolitesExceptions.txt");
private Map<String, Map<MetaboliteMajorLabel, String>> metabolitesIDs;
private Map<String, String> namesLowerCaseWithoutSigns;
private Map<String, String> namesWithoutSigns;
private Map<String, String> namesLowerCase;
private static final Logger logger = LoggerFactory.getLogger(IdentifyReactionsMetabolites.class);
public BiosynthMetabolites(Map<String, Map<MetaboliteMajorLabel, String>> metabolitesIDs, Map<String, String> namesLowerCaseWithoutSigns,
Map<String, String> namesWithoutSigns, Map<String, String> namesLowerCase) {
......@@ -19,6 +28,7 @@ public class BiosynthMetabolites {
this.namesWithoutSigns = namesWithoutSigns;
this.namesLowerCase = namesLowerCase;
exceptions();
}
/**
......@@ -48,5 +58,23 @@ public class BiosynthMetabolites {
public Map<String, String> getNamesLowerCase() {
return namesLowerCase;
}
/**
* Method to delete metabolites considered exceptions.
*/
private void exceptions() {
for(String key : toDelete) {
if(metabolitesIDs.containsKey(key)) {
metabolitesIDs.remove(key);
namesLowerCaseWithoutSigns.remove(key);
namesWithoutSigns.remove(key);
namesLowerCase.remove(key);
logger.debug("Metabolite ".concat(key).concat(" removed due to order from the configuration file."));
}
}
}
}
\ No newline at end of file
......@@ -24,6 +24,7 @@ import pt.uminho.sysbio.biosynth.integration.neo4j.BiodbMetaboliteNode;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
import reactions.IdentifyReactionsMetabolites;
import reactions.TransportReactionsBuilder;
import relations.MetabolitesChilds;
import tcdb.capsules.BiosynthMetaboliteProperties;
import tcdb.capsules.TcNumberContainer2;
import triageDatabase.PopulateTriageNeo4jDatabase;
......@@ -51,8 +52,8 @@ public class WriteByMetabolitesID {
logger.info("Retrieving data from Biosynth database...");
// Map<String, BiosynthMetaboliteProperties> data = getBiosynthDBData(service);
Map<String, BiosynthMetaboliteProperties> data = null;
Map<String, BiosynthMetaboliteProperties> data = getBiosynthDBData(service);
// Map<String, BiosynthMetaboliteProperties> data = null;
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
......@@ -109,7 +110,7 @@ public class WriteByMetabolitesID {
try {
/////TRIAGE
Map<String, Set<TcNumberContainer2>> reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData, namesAndIDsContainer, service);
......@@ -118,7 +119,52 @@ public class WriteByMetabolitesID {
Map<String, String[]> forChildsSearch = metabolitesIdentification.getforChildsSearch();
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service).getResults();
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(-1, true, tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data).getResults();
////COUNTS
// MetabolitesChilds.getMetaboliteChilds(-1, "META:Ions", MetaboliteMajorLabel.MetaCyc, service);
// Map<String, Integer> countsMap = new HashMap<>();
// Map<String, Integer> tcCounts = new HashMap<>();
//
// for(String acc : newData.keySet()) {
//
// for(TcNumberContainer2 container : newData.get(acc)) {
//
// if(tcCounts.containsKey(container.getTcNumber())) {
// int val = tcCounts.get(container.getTcNumber()) + 1;
// tcCounts.put(container.getTcNumber(), val);
// }
// else
// tcCounts.put(container.getTcNumber(), 1);
// }
// }
//
// for(String key : tcCounts.keySet())
// System.out.println(key + "\t" + tcCounts.get(key));
//
// for(String acc : newData.keySet()) {
//
// for(TcNumberContainer2 container : newData.get(acc)) {
// countsMap.put(container.getTcNumber(), container.getAllReactionsIds().size());
//
//// if(container.getTcNumber().equals("1.C.52.1.14")) {
////
//// for(int id : container.getAllReactionsIds())
//// System.out.println(container.getReactionContainer(id).getReaction());
////
//// }
//
// }
// }
//
// for(String key : countsMap.keySet())
// System.out.println(key + "\t" + countsMap.get(key));
////////////
// JSONFilesUtils.writeJSONTriageReactions(newData);
......
package reactions;
import java.util.Scanner;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Map;
import java.util.HashMap;
//FONT : https://gist.github.com/atomictom/7797647
// Format of formulas: [(](shorthand|longhand)[count][)][count]
//
// shorthand is case sensitive and is the symbol notation (e.g. C for Carbon, Co for Cobalt and Hg for Lead)
// longhand is the full element name and is not case sensitive (e.g. Carbon, CARBON, CaRBoN, lead)
// If a count is left out, it's implicitly '1', else you can put a count after a shorthand or longhand element
// You can group element with parenthesis and apply counts to the groups. (e.g. C2(HO)2)
// Any other symbols are stripped from the formula (e.g. -, +). Leaving them in does no harm
//
// The output is a map from the capitalized element name as a string, to it's count
public class FormulaParser {
public static String symbols_map =
"H Hydrogen\n" +
"He Helium\n" +
"Li Lithium\n" +
"Be Beryllium\n" +
"B Boron\n" +
"C Carbon\n" +
"N Nitrogen\n" +
"O Oxygen\n" +
"F Fluorine\n" +
"Ne Neon\n" +
"Na Sodium\n" +
"Mg Magnesium\n" +
"Al Aluminum\n" +
"Si Silicon\n" +
"P Phosphorus\n" +
"S Sulfur\n" +
"Cl Chlorine\n" +
"Ar Argon\n" +
"K Potassium\n" +
"Ca Calcium\n" +
"Sc Scandium\n" +
"Ti Titanium\n" +
"V Vanadium\n" +
"Cr Chromium\n" +
"Mn Manganese\n" +
"Fe Iron\n" +
"Co Cobalt\n" +
"Ni Nickel\n" +
"Cu Copper\n" +
"Zn Zinc\n" +
"Ga Gallium\n" +
"Ge Germanium\n" +
"As Arsenic\n" +
"Se Selenium\n" +
"Br Bromine\n" +
"Kr Krypton\n" +
"Rb Rubidium\n" +
"Sr Strontium\n" +
"Y Yttrium\n" +
"Zr Zirconium\n" +
"Nb Niobium\n" +
"Mo Molybdenum\n" +
"Tc Technetium\n" +
"Ru Ruthenium\n" +
"Rh Rhodium\n" +
"Pd Palladium\n" +
"Ag Silver\n" +
"Cd Cadmium\n" +
"In Indium\n" +
"Sn Tin\n" +
"Sb Antimony\n" +
"Te Tellurium\n" +
"I Iodine\n" +
"Xe Xenon\n" +
"Cs Cesium\n" +
"Ba Barium\n" +
"La Lanthanum\n" +
"Ce Cerium\n" +
"Pr Praseodymium\n" +
"Nd Neodymium\n" +
"Pm Promethium\n" +
"Sm Samarium\n" +
"Eu Europium\n" +
"Gd Gadolinium\n" +
"Tb Terbium\n" +
"Dy Dysprosium\n" +
"Ho Holmium\n" +
"Er Erbium\n" +
"Tm Thulium\n" +
"Yb Ytterbium\n" +
"Lu Lutetium\n" +
"Hf Hafnium\n" +
"Ta Tantalum\n" +
"W Tungsten\n" +
"Re Rhenium\n" +
"Os Osmium\n" +
"Ir Iridium\n" +
"Pt Platinum\n" +
"Au Gold\n" +
"Hg Mercury\n" +
"Tl Thallium\n" +
"Pb Lead\n" +
"Bi Bismuth\n" +
"Po Polonium\n" +
"At Astatine\n" +
"Rn Radon\n" +
"Fr Francium\n" +
"Ra Radium\n" +
"Ac Actinium\n" +
"Th Thorium\n" +
"Pa Protactinium\n" +
"U Uranium\n" +
"Np Neptunium\n" +
"Pu Plutonium\n" +
"Am Americium\n" +
"Cm Curium\n" +
"Bk Berkelium\n" +
"Cf Californium\n" +
"Es Einsteinium\n" +
"Fm Fermium\n" +
"Md Mendelevium\n" +
"No Nobelium\n" +
"Lr Lawrencium\n" +
"Rf Rutherfordium\n" +
"Db Dubnium\n" +
"Sg Seaborgium\n" +
"Bh Bohrium\n" +
"Hs Hassium\n" +
"Mt Meitnerium\n" +
"Ds Darmstadtium\n" +
"Rg Roentgenium\n" +
"Cn Copernicium\n" +
"Uut Ununtrium\n" +
"Fl Flerovium\n" +
"Uup Ununpentium\n" +
"Lv Livermorium\n" +
"Uus Ununseptium\n" +
"Uuo Ununoctium";
public static Map<String,String> getSymbolMap(){
Map<String, String> element_map = new HashMap<>();
Scanner in = new Scanner(symbols_map);
while(in.hasNext()){
String shortform = in.next();
String longform = in.next();
element_map.put(shortform, longform);
element_map.put(longform.toUpperCase(), longform);
}
return element_map;
}
private static Map<String,Integer> parseFormulaString(String formula, Map<String,String> element_map){
Map<String,Integer> element_counts = new HashMap<>();
/* formula = formula.toUpperCase(); */
int len = formula.length();
for(int i = 0; i < len;){
boolean is_group = false;
if(formula.charAt(i) == '('){
i++;
is_group = true;
}
int repeat_count = 1;
Map<String,Integer> atoms_in_group = new HashMap<>();
do{
int start = i;
int restore_i = 0;
String element = null;
String restore_element = null;
while(i < len && Character.isLetter(formula.charAt(i))){
i++;
element = formula.substring(start, i);
/* System.out.println( element); */
String element_from_map = element_map.get(element);
if(element_from_map == null)
element_from_map = element_map.get(element.toUpperCase());
if(element_from_map != null){
/* System.out.println("element from map: " + element_from_map); */
restore_i = i;
restore_element = element_from_map;
}
}
if(restore_element != null){
i = restore_i;
element = restore_element;
}
if(element == null || element == ""){
// System.out.println("Parse error: could not detect an element where one was expected in formula string.");
// System.out.println("Remaining formula to parse: " + formula.substring(i));
// System.exit(0);
}
start = i;
while(i < len && Character.isDigit(formula.charAt(i))){
i++;
}
int count;
try{
count = Integer.parseInt(formula.substring(start, i));
}catch(NumberFormatException e){
count = 1;
}
/* System.out.println("element: "+element); */
atoms_in_group.put(element, count);
if(i < len && formula.charAt(i) == ')'){
if(!is_group)
System.out.println("Parse error: unmatched parenthesis detected...");
i++;
is_group = false;
start = i;
while(i < len && Character.isDigit(formula.charAt(i)))
i++;
try{
repeat_count = Integer.parseInt(formula.substring(start, i));
}catch(NumberFormatException e){
repeat_count = 1;
}
}
}while(is_group == true);
for(String atom_type : atoms_in_group.keySet()){
int current_value = 0;
if(element_counts.containsKey(atom_type))
current_value = element_counts.get(atom_type);
element_counts.put(atom_type, current_value + atoms_in_group.get(atom_type) * repeat_count);
}
}
// for(Map.Entry<String,Integer> entry : element_counts.entrySet()){
// System.out.println(entry.getKey() + ": " + entry.getValue());
// }
return element_counts;
}
/**
* @param formula
* @return
*/
public static Map<String,Integer> parse(String formula, Map<String,String> element_map) {
if(element_map == null)
element_map = getSymbolMap();
formula = formula.replaceAll("[^a-zA-Z0-9()]", "");
Map<String,Integer> element_counts = parseFormulaString(formula, element_map);
return element_counts;
}
}
\ No newline at end of file
......@@ -5,8 +5,12 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import containers.BiosynthMetabolites;
import dictionary.Synonyms;
import internalDB.WriteByMetabolitesID;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
import tcdb.capsules.BiosynthMetaboliteProperties;
......@@ -26,7 +30,9 @@ public class IdentifyReactionsMetabolites {
private Set<String> metabolites;
private Map<String, String[]> forChildsSearch;
Synonyms dictionary;
private Synonyms dictionary;
private static final Logger logger = LoggerFactory.getLogger(IdentifyReactionsMetabolites.class);
public IdentifyReactionsMetabolites(Map<String, Set<TcNumberContainer2>> reactionsData, BiosynthMetabolites namesAndIDsContainer, BiodbGraphDatabaseService service) {
......@@ -37,16 +43,11 @@ public class IdentifyReactionsMetabolites {
Set<String> tcdbMetabolites = getMetabolitesFromReactions(reactionsData);
System.out.println("TOTAL FOR SEARCH: " + tcdbMetabolites.size());
logger.info("Total metabolites for search: {}", tcdbMetabolites.size());
getMetabolitesIDs(tcdbMetabolites, namesAndIDsContainer, service);
System.out.println("TOTAL FOUND: " + tcdbMetabolitesIDs.size()); //1103
// for(String metabolite : metabolites)
// System.out.println(metabolite);
//
// System.out.println(metabolites.size());
logger.info("Total found: {}", tcdbMetabolitesIDs.size());
}
......@@ -69,44 +70,40 @@ public class IdentifyReactionsMetabolites {
namesAndIDsContainer = standardizationOfNames2(namesAndIDsContainer);
// System.out.println("MET>>>>> " + allMetabolitesByName.get("Electron"));
System.out.println("metabolites >>>" + metabolites.size()); //2078
System.out.println("allmetabolites >>>" + namesAndIDsContainer.getMetabolitesIDs().size()); //154224
//
// System.out.println("metabolites >>>" + metabolites.size()); //2078
//
// System.out.println("allmetabolites >>>" + namesAndIDsContainer.getMetabolitesIDs().size()); //154224
//
identificationByDirectMatch();
System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
// System.out.println("FOUND1: " + tcdbMetabolitesIDs.size()); //911
identificationDeletingStoichiometry();
System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
// System.out.println("FOUND2: " + tcdbMetabolitesIDs.size()); //975
identificationInLowerCase();
System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089
// System.out.println("FOUND3: " + tcdbMetabolitesIDs.size()); //1089
identificationReplacingNonAlphanumeric();
System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
// System.out.println("FOUND4: " + tcdbMetabolitesIDs.size()); //1095
identificationIntroducingDandL();
System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103
// System.out.println("FOUND5: " + tcdbMetabolitesIDs.size()); //1103
identificationReplacingNonAlphanumericAndInLowercase();
System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
// System.out.println("FOUND6: " + tcdbMetabolitesIDs.size()); //1109
return tcdbMetabolitesIDs;
}
......@@ -176,8 +173,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.replaceAll("[^A-Za-z0-9]", "").toLowerCase());
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
else if(String.valueOf(metabolite.charAt(metabolite.length()-1)).equals(s)) {
......@@ -185,9 +180,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.substring(0, metabolite.length()-1).replaceAll("[^A-Za-z0-9]", ""));
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
}
......@@ -205,8 +197,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.replaceAll("[^A-Za-z0-9]", ""));
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
else if(String.valueOf(metabolite.charAt(metabolite.length()-1)).equals(s)) {
......@@ -214,8 +204,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.substring(0, metabolite.length()-1).replaceAll("[^A-Za-z0-9]", ""));
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
}
......@@ -233,8 +221,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.toLowerCase());
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
else if(String.valueOf(metabolite.charAt(metabolite.length()-1)).equals(s)) {
......@@ -243,8 +229,6 @@ public class IdentifyReactionsMetabolites {
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(metabolite.substring(0, metabolite.length()-1).toLowerCase());
// System.out.println(metabolite + ids);
saveMetabolite(metabolite, ids);
}
}
......@@ -277,9 +261,6 @@ public class IdentifyReactionsMetabolites {
*/
private void identificationDeletingStoichiometryAux(String metabolite, boolean correctName) {
// if(metabolite.equals("nCo2+"))
// System.out.println("metaboliteFound!!!");
String metabolite2 = metabolite;
if(correctName)
......@@ -299,24 +280,14 @@ public class IdentifyReactionsMetabolites {
met = met.replaceAll(REGEX_STOICHIOMETRY[i], "");
// if(metabolite.equals("nCo2+"))
// System.out.println("met >>>> " + met);
if(allMetabolitesByName.containsKey(met)) {
key = met;
}
// if(metabolite.equals("nCo2+"))
// System.out.println("key >>>> " + key);
if(key == null ) {
String alias = dictionary.getSynonym(met.replace("\\s+", "").toLowerCase());
// if(metabolite.equals("nCo2+"))
// System.out.println("alias >>>> " + alias);
if(alias != null) {
if(allMetabolitesByName.containsKey(alias))
......@@ -327,20 +298,15 @@ public class IdentifyReactionsMetabolites {
if(key != null) {
// if(metabolite.equals("nCo2+"))
// System.out.println("ids >>>> " + allMetabolitesByName.get(key));
Map<MetaboliteMajorLabel, String> ids = allMetabolitesByName.get(key);
// System.out.println(metabolite + ids);
saveMetabolite(metabolite2, ids);
}
}
catch (Exception e) {
System.out.println("[ERROR] while processing metabolite: " + metabolite);