Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

new strategy to find blast perfect matches. integrate BITs new options

parent 6b333d62
2020-07-21 19:29:16 jdbc[35]: exception
org.h2.jdbc.JdbcSQLSyntaxErrorException: Schema "UPDATEDBIGGDATABASE" not found; SQL statement:
DROP SCHEMA updatedbiggdatabase CASCADE; [90079-200]
at org.h2.message.DbException.getJdbcSQLException(DbException.java:576)
at org.h2.message.DbException.getJdbcSQLException(DbException.java:429)
at org.h2.message.DbException.get(DbException.java:205)
at org.h2.message.DbException.get(DbException.java:181)
at org.h2.command.ddl.DropSchema.update(DropSchema.java:46)
at org.h2.command.CommandContainer.update(CommandContainer.java:198)
at org.h2.command.Command.executeUpdate(Command.java:251)
at org.h2.jdbc.JdbcStatement.executeInternal(JdbcStatement.java:228)
at org.h2.jdbc.JdbcStatement.execute(JdbcStatement.java:201)
at pt.uminho.ceb.biosystems.merlin.dataAccess.H2DatabaseAccess.dropDatabase(H2DatabaseAccess.java:243)
at pt.uminho.ceb.biosystems.merlin.dataAccess.InitDataAccess.dropDatabase(InitDataAccess.java:170)
at pt.uminho.ceb.biosystems.merlin.services.DatabaseServices.dropDatabase(DatabaseServices.java:107)
at pt.uminho.ceb.biosystems.bigg.metabolic.loader.blast.ProvideBiggReactionsToGenes.loadDatabase(ProvideBiggReactionsToGenes.java:594)
at pt.uminho.ceb.biosystems.bigg.metabolic.loader.blast.ProvideBiggReactionsToGenes.<init>(ProvideBiggReactionsToGenes.java:80)
at pt.uminho.ceb.biosystems.bigg.metabolic.loader.blast.BiggMain.main(BiggMain.java:70)
......@@ -20,8 +20,8 @@ public class BiggMain {
public static void main(String[] args) throws Exception {
//String workFolderID = "/workdir/";
String workFolderID = "C:/Users/Asus/Desktop/"; //Docker (/workdir/)
String workFolderID = "/workdir/";
// String workFolderID = "C:/Users/Asus/Desktop/"; //Docker (/workdir/)
String paramsPath = workFolderID.concat("workerSubmissions/params.txt");
Logger logger = Logger.getLogger("BiggMain");
......@@ -43,6 +43,9 @@ public class BiggMain {
String filePath = workFolderID.concat("workerSubmissions/protein.faa");
Boolean includeReactionsWithoutGPRBigg = Boolean.parseBoolean(listArgs.get(5));
Boolean includeReactionsWithoutGPR = Boolean.parseBoolean(listArgs.get(6));;
int option = Integer.parseInt(listArgs.get(0));
logger.info("Option = " + option);
......@@ -64,7 +67,7 @@ public class BiggMain {
}
logger.info("Starting ProvideBiggReactionsToGenes");
new ProvideBiggReactionsToGenes(workFolderID, filePath, properties, option, items);
new ProvideBiggReactionsToGenes(workFolderID, filePath, properties, option, items, includeReactionsWithoutGPRBigg, includeReactionsWithoutGPR);
File unzippedResults = new File(workFolderID+"Bigg_Files/Results/");
......@@ -124,6 +127,8 @@ public class BiggMain {
for(String model:modelFilesForBlast)
models = models+model+";";
System.out.println("Selected models: " + models);
return models;
}
......
......@@ -2,10 +2,13 @@ package pt.uminho.ceb.biosystems.bigg.metabolic.loader.blast;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Observable;
import java.util.Observer;
import java.util.Set;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
......@@ -119,7 +122,9 @@ public class Blast implements Observer{
= run_similaritySearch.runBBBlastHits(queryFilePath,subjectFastaFile,
false, this.evalue,this.bitScore,this.queryCovergage,this.queryCovergage);
Map<String,Set<String>> queryTargetGenesMap = AlignmentsUtils.processBidirectionalBestHits(bbHits);
Map<String,Set<String>> queryTargetGenesMap = processBidirectionalBestHits(bbHits);
results = queryTargetGenesMap;
}
......@@ -175,6 +180,100 @@ public class Blast implements Observer{
public Map<String,Set<String>> getResults() {
return results;
}
/**
* @param bbHits - adaptation from the same method of the AlignmentsUtils class
* @return
*/
private static Map<String, Set<String>> processBidirectionalBestHits(Pair<ConcurrentLinkedQueue<AlignmentContainer>,ConcurrentLinkedQueue<AlignmentContainer>> bbHits){
Map<String, Set<String>> queryGenomeOrthologsMap = AlignmentsUtils.getOrthologsGenesMap(bbHits.getA());
Map<String, Set<String>> subjectGenomeOrthologsMap = AlignmentsUtils.getOrthologsGenesMap(bbHits.getB());
Map<String, Set<AlignmentContainer>> queryGenomeOrthologsMapContainer = new HashMap<String, Set<AlignmentContainer>>();
Map<String, Set<AlignmentContainer>> subjectGenomeOrthologsMapContainer = new HashMap<String, Set<AlignmentContainer>>();
for(AlignmentContainer a : bbHits.getA()) {
if(queryGenomeOrthologsMapContainer.containsKey(a.getQuery())) {
Set<AlignmentContainer> current = queryGenomeOrthologsMapContainer.get(a.getQuery());
current.add(a);
queryGenomeOrthologsMapContainer.put(a.getQuery(), current);
}else {
Set<AlignmentContainer> newSet = new HashSet<AlignmentContainer>();
newSet.add(a);
queryGenomeOrthologsMapContainer.put(a.getQuery(), newSet);
}
}
for(AlignmentContainer b : bbHits.getB()) {
if(subjectGenomeOrthologsMapContainer.containsKey(b.getQuery())) {
Set<AlignmentContainer> current = subjectGenomeOrthologsMapContainer.get(b.getQuery());
current.add(b);
subjectGenomeOrthologsMapContainer.put(b.getQuery(), current);
}else {
Set<AlignmentContainer> newSet = new HashSet<AlignmentContainer>();
newSet.add(b);
subjectGenomeOrthologsMapContainer.put(b.getQuery(), newSet);
}
}
for(Entry<String,Set<AlignmentContainer>> entry : queryGenomeOrthologsMapContainer.entrySet()) {
for(AlignmentContainer container : entry.getValue()) {
//Get results for the condition´
int queryLength = container.getQueryLength();
int targetLength = container.getTargetLength();
int queryCoverage = container.getAlignmentLength() / queryLength;
int subjectCoverage = container.getAlignmentLength() / targetLength;
int identity = container.getNumIdenticals();
//Check if condition applies
if(queryCoverage == 1 && subjectCoverage == 1 && (identity == queryLength && identity == targetLength)) {
//Edit the Direct blast map to have only the alignment that corresponds 100%
Set<AlignmentContainer> newSet = new HashSet<AlignmentContainer>();
Set<String> newSetString = new HashSet<String>();
newSet.add(container);
newSetString.add(container.getTarget());
queryGenomeOrthologsMapContainer.put(entry.getKey(),newSet);
queryGenomeOrthologsMap.put(entry.getKey(), newSetString);
//Get the correspondent hit from the other map (reverse blast)
Set<AlignmentContainer> subjContainers = subjectGenomeOrthologsMapContainer.get(container.getTarget());
AlignmentContainer validSubjContainer = null;
for(AlignmentContainer subjContainer : subjContainers) {
if(subjContainer.getTarget().equals(container.getQuery())) {
validSubjContainer = subjContainer;
}
else {
subjectGenomeOrthologsMapContainer.remove(subjContainer.getQuery());
subjectGenomeOrthologsMap.remove(subjContainer.getQuery());
}
}
//Edit the reverse blast map to have only the alignment that corresponds 100%
Set<AlignmentContainer> newSubjSet = new HashSet<AlignmentContainer>();
Set<String> newSubjSetString = new HashSet<String>();
newSubjSet.add(validSubjContainer);
newSubjSetString.add(validSubjContainer.getTarget());
subjectGenomeOrthologsMapContainer.put(container.getTarget(),newSubjSet);
subjectGenomeOrthologsMap.put(container.getTarget(),newSubjSetString);
}
}
}
return AlignmentsUtils.intersectBidirectionalBestHitsMaps(queryGenomeOrthologsMap,subjectGenomeOrthologsMap);
}
}
......
......@@ -19,7 +19,7 @@ public class Properties {
private static final Logger logger = LoggerFactory.getLogger(Properties.class);
public Properties() {
// String propertiesFilePath = "C:/Users/Asus/Desktop/configs/configurations.txt";//BiggUtils.getRootDirectory() + "configs/configurations.txt"; //Docker - BiggUtils.getRootDirectory() + "configs/"
// String propertiesFilePath = "C:/Users/Asus/Desktop/configs/configurations.txt";
String propertiesFilePath = BiggUtils.getRootDirectory() + "configs/configurations.txt";
allProperties = FileUtils.readPropertiesFile(false,propertiesFilePath);
......
......@@ -24,6 +24,10 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.lang.StringUtils;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.BiggDataCenter;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.BiggModels;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.BiggReactionResult;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.BiggReactions;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.UpdateBiggFiles;
import pt.uminho.ceb.biosystems.merlin.core.containers.model.ReactionContainer;
import pt.uminho.ceb.biosystems.merlin.services.DatabaseServices;
import pt.uminho.ceb.biosystems.merlin.services.model.ModelReactionsServices;
......@@ -38,6 +42,11 @@ public class ProvideBiggReactionsToGenes {
private String workFolderID;
private Map<String,Set<String>> queryGeneBiggGeneMap;
private String workspaceName = "updatedBiggDatabase";
private Boolean includeReactionsWithoutGPRBigg = false;
private Boolean includeReactionsWithoutGPR = false;
int option;
String models;
List<String> inModelGenesTeste = new ArrayList<String>();
/**
*
......@@ -47,15 +56,19 @@ public class ProvideBiggReactionsToGenes {
* @param option
* @param items
*/
public ProvideBiggReactionsToGenes(String workFolderID, String queryPathParameters, Properties properties, int option, String items) {
public ProvideBiggReactionsToGenes(String workFolderID, String queryPathParameters, Properties properties, int option, String items, Boolean includeReactionsWithoutGPRBigg, Boolean includeReactionsWithoutGPR) {
this.queryPath = queryPathParameters;
this.workFolderID = workFolderID;
this.includeReactionsWithoutGPR = includeReactionsWithoutGPR;
this.includeReactionsWithoutGPRBigg = includeReactionsWithoutGPRBigg;
this.option = option;
this.models = items;
try {
Blast blast = new Blast(workFolderID, queryPath, properties, option, items);
blastResults = blast.getResults();
blastResults = blast.getResults();
getReactionIds(properties, option);
getReactionIds(properties, option, items);
gprsMap = createGPRs();
loadDatabase();
......@@ -68,27 +81,26 @@ public class ProvideBiggReactionsToGenes {
}
}
private void getReactionIds(Properties properties, int option) throws IOException {
private void getReactionIds(Properties properties, int option, String items) throws IOException {
try {
switch (option) {
case 1:
sequenceIdsGenesRelation = "sequenceIdsGenesRelation.txt";
seqIdReactionRelation = BiggDataCenter.readSeqIDReactionRelationMap(workFolderID);
break;
case 2:
case 3:
case 4:
sequenceIdsGenesRelation = "sequenceIdsGenesRelationTemp.txt";
seqIdReactionRelation = BiggDataCenter.seqIDReactionRelationMap(sequenceIdsGenesRelation, workFolderID, items);
break;
}
seqIdReactionRelation = BiggDataCenter.seqIDReactionRelationMap(sequenceIdsGenesRelation, workFolderID);
}
catch(Exception ex) {
......@@ -101,6 +113,8 @@ public class ProvideBiggReactionsToGenes {
private Map<String,String> createGPRs() throws Exception {
List<String> inModelReactions = new ArrayList<String>();
queryGeneBiggGeneMap = BiggDataCenter.biggGeneToQueryGeneRetriever(sequenceIdsGenesRelation,workFolderID,blastResults); //get bigg genes associated with the query gene
Map<String,String> parsedRules = booleanRulesParser(BiggDataCenter.readGenesReactionsRuleFile(workFolderID));
......@@ -111,28 +125,31 @@ public class ProvideBiggReactionsToGenes {
for(Entry<String,Set<String>> entry : blastResults.entrySet()) {
for(String gene : entry.getValue()) {
List<String> reactions = seqIdReactionRelation.get(gene);
for(String reaction : reactions) {
resultsRules.put(reaction, parsedRules.get(reaction));
if(!inModelReactions.contains(reaction))
inModelReactions.add(reaction);
if(parsedRules.get(reaction) != null)
resultsRules.put(reaction, parsedRules.get(reaction));
}
}
}
}
for(Entry<String,String> parsedRule : resultsRules.entrySet()) {
Set<String> queryGenes = new HashSet<String>();
String reaction = parsedRule.getKey();
String rule = parsedRule.getValue();
//If the rule contains OR, split the rule into a list, in order to validate each option
if(rule.contains(" or ")) {
List<String> splittedByOr = new ArrayList<String>();
splittedByOr = Arrays.asList(rule.split("\\s+or\\s+"));
......@@ -223,6 +240,7 @@ public class ProvideBiggReactionsToGenes {
}
subRule = subRuleAnd;
}else {
subRule = idGenesMap.get(subRule);
}
......@@ -242,8 +260,6 @@ public class ProvideBiggReactionsToGenes {
finalRulesParsed.put(entry.getKey(), finalRuleString);
}
System.out.println("FINALE");
writeGPRsFile(finalRulesParsed);
return finalRulesParsed;
......@@ -262,8 +278,6 @@ public class ProvideBiggReactionsToGenes {
FileOutputStream fos = new FileOutputStream(fout);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos));
for(Entry<String, String> entry : finalRulesParsed.entrySet()) {
writer.write(entry.getKey() + " - " + entry.getValue());
......@@ -271,6 +285,7 @@ public class ProvideBiggReactionsToGenes {
}
writer.close();
}
/**
......@@ -501,10 +516,15 @@ public class ProvideBiggReactionsToGenes {
return executeDistributions(set1, set2, cycle);
}
private void loadDatabase() throws IOException {
private void loadDatabase() throws Exception {
try {
File h2DatabaseFile = new File(workFolderID + "h2Database/");
if(h2DatabaseFile.exists())
org.apache.commons.io.FileUtils.deleteDirectory(h2DatabaseFile);
AtomicBoolean cancel = new AtomicBoolean(false);
boolean verify = false;
......@@ -554,14 +574,16 @@ public class ProvideBiggReactionsToGenes {
org.apache.commons.io.FileUtils.deleteDirectory(tablesPath);
File biggDatabaseDump = new File(workFolderID + "Bigg_Files/BiggDatabaseDump/");
org.apache.commons.io.FileUtils.deleteDirectory(biggDatabaseDump);
DatabaseServices.dropDatabase(workspaceName);
DatabaseServices.dropConnection(workspaceName);
}catch(Exception ex) {
ex.printStackTrace();
}finally {
DatabaseServices.dropDatabase(workspaceName);
DatabaseServices.dropConnection(workspaceName);
File biggDatabaseDump = new File(workFolderID + "Bigg_Files/BiggDatabaseDump/");
if(biggDatabaseDump.exists())
org.apache.commons.io.FileUtils.deleteDirectory(biggDatabaseDump);
......@@ -570,7 +592,6 @@ public class ProvideBiggReactionsToGenes {
if(tablesPath.exists())
org.apache.commons.io.FileUtils.deleteDirectory(tablesPath);
File h2DatabaseFile = new File(workFolderID + "h2Database/");
if(h2DatabaseFile.exists())
org.apache.commons.io.FileUtils.deleteDirectory(h2DatabaseFile);
......@@ -584,6 +605,7 @@ public class ProvideBiggReactionsToGenes {
private void updateDatabase() {
try {
System.out.println("Starting update");
//Update reactions
......@@ -601,7 +623,44 @@ public class ProvideBiggReactionsToGenes {
System.out.println("mapReactionByString is filled");
//Force update for reactions not associated with genome in BiGG Models
if(includeReactionsWithoutGPRBigg && option != 1) {
List<String> modelList = Arrays.asList(models.split(";"));
modelList.replaceAll(String::trim);
for(String model : modelList) {
BiggModels current = UpdateBiggFiles.readSpecificModelDetails(model);
List<BiggReactions> reactions = current.getReactions();
for (BiggReactions reaction : reactions) {
List<BiggReactionResult> reactionResults = reaction.getResults();
for(BiggReactionResult result : reactionResults) {
if((result.getGeneReactionRule().equals(null) || result.getGeneReactionRule().equals(""))) {
if(result.getExportedReactionId().toLowerCase().contains("biomass"))
continue;
int reactionId = mapReactionByString.get(result.getExportedReactionId());
ModelReactionsServices.updateModelReactionInModelByReactionId(workspaceName, reactionId, true);
}
}
}
}
}
List<String> names = new ArrayList<>();
for(Entry<String,Set<String>> entry : blastResults.entrySet()) {
......@@ -609,13 +668,17 @@ public class ProvideBiggReactionsToGenes {
for(String gene : entry.getValue()) {
List<String> reactions = seqIdReactionRelation.get(gene);
for(String reaction : reactions) {
if(!mapReactionByString.containsKey(reaction))
if(!mapReactionByString.containsKey(reaction)) {
continue;
}
if(!includeReactionsWithoutGPR) {
if(!gprsMap.containsKey(reaction))
continue;
}
if(!names.contains(reaction))
names.add(reaction);
......@@ -623,22 +686,47 @@ public class ProvideBiggReactionsToGenes {
int reactionId = mapReactionByString.get(reaction);
ModelReactionsServices.updateModelReactionInModelByReactionId(workspaceName, reactionId, true);
}
}
}
List<String> inModel = new ArrayList<String>();
for(Entry<String,Set<String>> entry : blastResults.entrySet()) {
for(String gene : entry.getValue()) {
List<String> reactions = seqIdReactionRelation.get(gene);
for(String reaction : reactions) {
if(!mapReactionByString.containsKey(reaction))
continue;
int reactionId = mapReactionByString.get(reaction);
ReactionContainer cont = ModelReactionsServices.getReaction(workspaceName, reactionId);
if(cont.isInModel())
if(!inModel.contains(cont.getExternalIdentifier()))
inModel.add(cont.getExternalIdentifier());
}
}
}
//Update GPRs
//Update GPRs
System.out.println("Starting Update GPRs");
Map<String,String> rules = new HashMap<>();
for (Entry<String, String> gpr : gprsMap.entrySet()) {
rules.put(gpr.getKey(), gpr.getValue());
}
System.out.println("Sending the updated rules to the DB");
......@@ -654,7 +742,7 @@ public class ProvideBiggReactionsToGenes {
}
/**
* This method verifies the key in the md5 file and checks whether the results were corrupted or not.
* @return boolean informing whether the results were corrupted or not.
......
......@@ -9,6 +9,7 @@ import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
......@@ -19,6 +20,11 @@ import java.util.Map.Entry;
import pt.uminho.ceb.biosystems.bigg.metabolic.loader.data.BiggUtils;
import org.apache.commons.io.IOUtils;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.RNATools;
import org.biojava.bio.symbol.IllegalAlphabetException;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.SymbolList;
import org.json.JSONArray;
import org.json.JSONObject;
......@@ -71,21 +77,50 @@ public class BiggDataCenter {
else
addedGenes.add(gene.getBiggId());
if(!addedSequences.contains(gene.getProteinSequence())){
String sequence = "";
if(gene.getProteinSequence() != null && gene.getProteinSequence() != ""){
sequence = gene.getProteinSequence();
}else {
if(gene.getDnaSequence() != null && gene.getDnaSequence() != "") {
try {
//create a DNA SymbolList
SymbolList symL = DNATools.createDNA(gene.getDnaSequence());
//transcribe to RNA
symL = DNATools.toRNA(symL);
//translate to protein
symL = RNATools.translate(symL);
sequence = symL.seqString();
sequence = sequence.replace("*", "");
}catch (IllegalAlphabetException ex) {
System.out.println(ex.getMessage());
}catch (IllegalSymbolException ex) {
System.out.println(ex.getMessage());
}catch (IllegalArgumentException ex) {
System.out.println(ex.getMessage());
}
}
}
if(!addedSequences.contains(sequence)){
//first time encounter
addedSequences.add(gene.getProteinSequence());
String formatted = "g"+String.format("%05d", idNr);
addedSequences.add(sequence);
String formatted = "g"+String.format("%09d", idNr);
currentSeqId = formatted;
sequenceIdsProteinSequence.put(gene.getProteinSequence(),formatted );
sequenceIdsProteinSequence.put(sequence,formatted );
sequenceIds.put(formatted, gene.getBiggId());
writeFastaFile(writer, gene.getProteinSequence(), currentSeqId);
writeFastaFile(writer, sequence, currentSeqId);
idNr++;
}else{
//already exists - writes in another file (repeatedSequences)
String seqId = sequenceIdsProteinSequence.get(gene.getProteinSequence());
String seqId = sequenceIdsProteinSequence.get(sequence);
if(!repeatedSequences.contains(gene)) {
repeatedSequences.add(gene);
......@@ -125,6 +160,126 @@ public class BiggDataCenter {
writer.println(proteinSeq);
}
//Models reactions
/**
*
* @param allReactionsDetails
* @return
*/
private static List<BiggReactions> readModelReactions(JSONObject allReactionsDetails) {
// TODO Auto-generated method stub
List<BiggReactions> modelReactionsList = new ArrayList<BiggReactions>();
List<String> modelReactionBiggIds = BiggUtils.getBiggIds(allReactionsDetails);
for(String biggId : modelReactionBiggIds) {
JSONObject reactionDetails = allReactionsDetails.getJSONObject(biggId);
BiggReactions reaction = new BiggReactions();
//Name
reaction.setName(BiggUtils.validateString("name", reactionDetails));
//Bigg Ids
reaction.setBiggId(BiggUtils.validateString("bigg_id", reactionDetails));
//Pseudo-reaction
reaction.setPseudoreaction(reactionDetails.getBoolean("pseudoreaction"));
//Old identifiers
reaction.setOldIdentifiers(BiggUtils.validateStringArray("old_identifiers", reactionDetails));
//Db Links
reaction.setDbLink(BiggUtils.getDbLinks(reactionDetails));
//Metabolites
JSONArray metabolitesArray = reactionDetails.getJSONArray("metabolites");
List<BiggMetabolites> metabolitesList = new ArrayList<BiggMetabolites>();
for (int i = 0; i < metabolitesArray.length(); i++) {
BiggMetabolites newMetabolite = new BiggMetabolites();
JSONObject metaboliteObj = metabolitesArray.getJSONObject(i);
newMetabolite.setBiggId(BiggUtils.validateString("bigg_id", metaboliteObj));
newMetabolite.setName(BiggUtils.validateString("name", metaboliteObj));
newMetabolite.setCompartmentBiggId(BiggUtils.validateString("compartment_bigg_id", metaboliteObj));
try {
Integer stoicInt = new Integer((Integer) metaboliteObj.get("stoichiometry"));
newMetabolite.setStoichiometry(stoicInt.floatValue());
}catch(Exception e) {
Double stoicInt = new Double((Double) metaboliteObj.get("stoichiometry"));
newMetabolite.setStoichiometry (stoicInt.floatValue());
}
metabolitesList.add(newMetabolite);
}
reaction.setMetabolites(metabolitesList);
//Escher Map
reaction.setEscher_maps(BiggUtils.getEscherMaps(reactionDetails));
//Result
List<BiggReactionResult> resultsList = readReactionResults(reactionDetails.getJSONArray("results"));
reaction.setResults(resultsList);
modelReactionsList.add(reaction);
}
return modelReactionsList;
}
/**
*
* @param results
* @return
*/
private static List<BiggReactionResult> readReactionResults(JSONArray results) {
List<BiggReactionResult> reactionResultsList = new ArrayList<BiggReactionResult>();
for (int i = 0; i < results.length(); i++)
{
BiggReactionResult newResult = new BiggReactionResult();
JSONObject currentResult = (JSONObject) results.get(i);
newResult.setUpperBound(currentResult.getInt("upper_bound"));
newResult.setLowerBound(currentResult.getInt("lower_bound"));
newResult.setObjectiveCoefficient(currentResult.getInt("objective_coefficient"));
newResult.setCopyNr(currentResult.getInt("copy_number"));
newResult.setGeneReactionRule(BiggUtils.validateString("gene_reaction_rule", currentResult));
newResult.setSubsystem(BiggUtils.validateString("subsystem", currentResult));
newResult.setReactionString(BiggUtils.validateString("reaction_string", currentResult));
newResult.setExportedReactionId(BiggUtils.validateString("exported_reaction_id", currentResult));
JSONArray resultGenes = currentResult.getJSONArray("genes");
List<BiggGenes> genesList = new ArrayList<BiggGenes>();
for (int j = 0; j < resultGenes.length(); j++)
{
JSONObject currentGene = (JSONObject) resultGenes.get(j);
BiggGenes gene = new BiggGenes();
if(currentGene.get("name").equals(null))
gene.setName("");
else
gene.setName(currentGene.getString("name"));
gene.setBiggId(currentGene.getString("bigg_id"));
genesList.add(gene);
}
newResult.setGenes(genesList);
reactionResultsList.add(newResult);
}
return reactionResultsList;
}
/**
* Models genes
......@@ -206,11 +361,11 @@ public class BiggDataCenter {
* @return
* @throws IOException
*/
public static Map<String, List<String>> seqIDReactionRelationMap(String seqIdsGenesRelationFile, String dockerPath) throws IOException {
public static Map<String, List<String>> seqIDReactionRelationMap(String seqIdsGenesRelationFile, String dockerPath, String models) throws IOException {
Map<String,List<String>> seqIdReaction = new HashMap<String, List<String>>();
Map<String,String> seqIdsGenesRelation = readSeqIDGenesRelationFile(seqIdsGenesRelationFile, dockerPath);
HashMap<String,List<String>> geneReactionRelation = readGeneReactionRelationFile(dockerPath);
HashMap<String,List<String>> geneReactionRelation = geneReactionRelationFileTemp(models, dockerPath);
for(Map.Entry<String, String> entry : seqIdsGenesRelation.entrySet()) {
......@@ -219,7 +374,7 @@ public class BiggDataCenter {
String biggIdsJoined = seqIdsGenesRelation.get(entry.getKey());
List<String> biggIdsSplitted = Arrays.asList(biggIdsJoined.split(";"));
biggIdsSplitted.replaceAll(String::trim);
for(String geneBiggId : biggIdsSplitted) {
......@@ -245,6 +400,171 @@ public class BiggDataCenter {
}
private static HashMap<String, List<String>> geneReactionRelationFileTemp(String models, String workFolderID) {
// TODO Auto-generated method stub
JSONObject geneReactions = new JSONObject();
HashMap<String,List<String>> geneReactionMap = new HashMap<String,List<String>>();
List<String> modelList = Arrays.asList(models.split(";"));
modelList.replaceAll(String::trim);
try {
for (String model : modelList) {
File f = new File(workFolderID+"Bigg_Files/BiggData/"+model+".txt");
if(!f.exists()) {
System.out.println("An error occurred: Model " + model + "was not found.");
break;
}
String file = BiggUtils.readFile(workFolderID+"Bigg_Files/BiggData/"+model+".txt", StandardCharsets.UTF_8);
System.out.println(model);
JSONObject modelDetails = new JSONObject(file);
modelDetails = modelDetails.getJSONObject(model);
BiggModels biggModel = new BiggModels();
//model genes
biggModel.setGenes(readModelGenes(modelDetails.getJSONObject("gene")));
biggModel.setReactions(readModelReactions(modelDetails.getJSONObject("reaction")));
List<BiggReactions> biggReactions = biggModel.getReactions();
for(BiggGenes gene : biggModel.getGenes()) {
List<String> listReactionsBiggIds = new ArrayList<String>();
for(BiggReactions reaction : gene.getReaction()) {
for(BiggReactions reac : biggReactions) {
if(reac.getBiggId().equals(reaction.getBiggId())) {
for(BiggReactionResult result : reac.getResults()) {
listReactionsBiggIds.add(result.getExportedReactionId());
}
}
}
}
if(geneReactions.has(gene.getBiggId())) {
JSONArray jsonReactions = (JSONArray) geneReactions.get(gene.getBiggId());
ArrayList<String> currentReactions = new ArrayList<String>();
JSONArray jArray = (JSONArray)jsonReactions;
if (jArray != null) {
for (int i=0;i<jArray.length();i++){
currentReactions.add(jArray.getString(i));
}
}
for(String reaction : listReactionsBiggIds) {
if(!currentReactions.contains(reaction))
currentReactions.add(reaction);
}
geneReactions.remove(gene.getBiggId());
geneReactions.put(gene.getBiggId(), currentReactions);
}else
geneReactions.put(gene.getBiggId(), listReactionsBiggIds);
}
}
String jsonText = geneReactions.toString();
int index = jsonText.indexOf("{");
jsonText = jsonText.substring(index);
JSONObject jsonFile = new JSONObject(jsonText);
JSONArray allNames = jsonFile.names();
ArrayList<String> listNames = new ArrayList<String>();
if (allNames != null) {
for (int i=0;i<allNames.length();i++){
listNames.add(allNames.getString(i));
}
}
for(String key : listNames) {
List<String> list = new ArrayList<String>();
JSONArray result = jsonFile.getJSONArray(key);
for (int i=0; i < result.length(); i++) {
String currentReaction = result.getString(i);
list.add(currentReaction);
}
geneReactionMap.put(key, list);
}
} catch (IOException e) {
e.printStackTrace();
}
return geneReactionMap;
}
/**
*
* @param dockerPath
* @return
* @throws IOException
*/
public static Map<String, List<String>> readSeqIDReactionRelationMap(String dockerPath) throws IOException {
System.out.println("Starting to read seqIdsReactionsRelation file");
Map<String,List<String>> seqIdReaction = new HashMap<String, List<String>>();
BufferedReader br = null;
String fileName = dockerPath+"Bigg_Files/Results/seqIdsReactionsRelation.txt";
try {
br = new BufferedReader(new FileReader(fileName));
String line;
while ((line = br.readLine()) != null) {
String key = line.split("-")[0].trim();
String value = line.split("-")[1].trim();
List<String> reactions = Arrays.asList(value.split("\\s+"));
seqIdReaction.put(key, reactions);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (br != null) {
br.close();
}
}
System.out.println("file read");
return seqIdReaction;
}
/**
......@@ -413,6 +733,51 @@ public class BiggDataCenter {
}
return map;
}
/**
* @param sequenceIdsGenesRelation
* @param workFolderID
* @param blastResults
* @return
* @throws IOException
*/
public static Map<String, List<String>> queryGeneToBiggGeneRetriever(String sequenceIdsGenesRelation, String workFolderID, Map<String,Set<String>> blastResults) throws IOException{
Map<String,String> seqIDGenesMap = BiggDataCenter.readSeqIDGenesRelationFile(sequenceIdsGenesRelation, workFolderID);
Map<String,List<String>> map = new HashMap<String,List<String>>();
for (Entry<String,Set<String>> result : blastResults.entrySet()) {
List<String> biggGenesList = new ArrayList<String>();
for(String gene : result.getValue()) {
String biggGenes = seqIDGenesMap.get(gene);
List<String> subList = Arrays.asList(biggGenes.split(";"));
for(String i : subList)
biggGenesList.add(i);
}
map.put(result.getKey(), biggGenesList);
}
List<String> totalGenes = new ArrayList<String>();
for(Entry<String, List<String>> entry : map.entrySet()) {
for(String gene : entry.getValue()) {
if(!totalGenes.contains(gene))
totalGenes.add(gene);
}
}
return map;
}
......
......@@ -25,63 +25,128 @@ public class BiggDataLoader {
private static HashMap<String,List<String>> metabolitesAliases = new HashMap<String,List<String>>();
private static List<String> allEnzymes;
private static HashMap<String,String> allEnzymesNames;
private static List<WorkspaceData> allModelsInformation = new ArrayList<WorkspaceData>();
private static ConcurrentLinkedQueue<ReactionContainer> allReactions = new ConcurrentLinkedQueue<ReactionContainer>();
private static ConcurrentLinkedQueue<MetaboliteContainer> allMetabolites = new ConcurrentLinkedQueue<MetaboliteContainer>();
private static ConcurrentLinkedQueue<String> allReactionsString = new ConcurrentLinkedQueue<String>();
private static ConcurrentLinkedQueue<String> allMetabolitesString = new ConcurrentLinkedQueue<String>();
private static HashMap<String,List<String>> allReactionsAliases = new HashMap<String,List<String>>();
public static void loadData(String model, String databaseName) throws Exception {
UpdateBiggFiles dataCenter = new UpdateBiggFiles(model);
public static HashMap<String,List<String>> loadAllData(List<String> listModels, String databaseName) throws Exception {
WorkspaceInitialData databaseInitialData = null;
WorkspaceData data = new WorkspaceData();
databaseInitialData = WorkspaceInitialDataServices.retrieveAllData(databaseName);
data.getResultCompartments().addAll((compartmentsTransformer(dataCenter.getModelMetabolites())));
data.setResultMetabolites(metabolitesTransformer(dataCenter.getModelMetabolites()));
data.setResultEnzymes(enzymesTransformer(dataCenter.getModelReactions()));
data.setResultReactions(reactionsTransformer(dataCenter.getModelReactions()));
for(String model : listModels)
allModelsInformation.add(loadData(model));
int numberOfProcesses = Runtime.getRuntime().availableProcessors();
List<Thread> threads = new ArrayList<Thread>();
System.out.println("Reading all models metabolites and reactions");
int c=1;
for (WorkspaceData data: allModelsInformation)
{
System.out.println(c); c++;
for(MetaboliteContainer met : data.getResultMetabolites()) {
if(!allMetabolitesString.contains(met.getExternalIdentifier())) {
allMetabolites.add(met);
allMetabolitesString.add(met.getExternalIdentifier());
}
}
for(ReactionContainer reac : data.getResultReactions()) {
if(!allReactionsString.contains(reac.getExternalIdentifier())) {
allReactions.add(reac);
allReactionsString.add(reac.getExternalIdentifier());
}
}
List<Runnable> runnables = new ArrayList<Runnable>();
}
System.out.println("Getting all models aliases");
databaseInitialData.setMetabolitesIdentifier(new ConcurrentHashMap<String,Integer>());
int z = 1;
for (WorkspaceData data: allModelsInformation)
{
System.out.println(z); z++;
data.setResultReactions(reactionsAliasesTester(data.getResultReactions()));
data.setResultMetabolites(metabolitesAliasesTester(data.getResultMetabolites()));
}
System.out.println("Starting insertion in database");
int x=1;
for(WorkspaceData data : allModelsInformation) {
System.out.println(x); x++;
int numberOfProcesses = Runtime.getRuntime().availableProcessors();
List<Thread> threads = new ArrayList<Thread>();
numberOfProcesses = Runtime.getRuntime().availableProcessors();
threads = new ArrayList<Thread>();
List<Runnable> runnables = new ArrayList<Runnable>();
numberOfProcesses = Runtime.getRuntime().availableProcessors();
threads = new ArrayList<Thread>();
long startTime = System.currentTimeMillis();
for(int i=0; i<numberOfProcesses; i++) {
Runnable loadBiggData = new BiggLoadMetabolicData(databaseName, data, databaseInitialData, cancel);
runnables.add(loadBiggData);
Thread thread = new Thread(loadBiggData);
threads.add(thread);
thread.start();
}
long startTime = System.currentTimeMillis();
WorkspaceInitialData databaseInitialData = null;
databaseInitialData = WorkspaceInitialDataServices.retrieveAllData(databaseName);
databaseInitialData.setMetabolitesIdentifier(new ConcurrentHashMap<String,Integer>());
for(Thread thread :threads)
thread.join();
numberOfProcesses = Runtime.getRuntime().availableProcessors();///2;
threads = new ArrayList<Thread>();
for(int i=0; i<numberOfProcesses; i++) {
Runnable loadBiggData = new BiggLoadMetabolicData(databaseName, data, databaseInitialData, cancel);
runnables.add(loadBiggData);
Thread thread = new Thread(loadBiggData);
threads.add(thread);
thread.start();
}
for(Thread thread :threads)
thread.join();
numberOfProcesses = Runtime.getRuntime().availableProcessors();///2;
threads = new ArrayList<Thread>();
for(Thread thread :threads)
thread.join();
for(Thread thread :threads)
thread.join();
long endTime2 = System.currentTimeMillis();
long startTime1 = System.currentTimeMillis();
long endTime1 = System.currentTimeMillis();
long endTime = System.currentTimeMillis();
long endTime2 = System.currentTimeMillis();
long startTime1 = System.currentTimeMillis();
long endTime1 = System.currentTimeMillis();
long endTime = System.currentTimeMillis();
System.out.println("Total elapsed time in execution of method Load Bigg data is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime2-startTime),TimeUnit.MILLISECONDS.toSeconds(endTime2-startTime) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime2-startTime))));
System.out.println("Total elapsed time in execution of method Load Bigg data is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime2-startTime),TimeUnit.MILLISECONDS.toSeconds(endTime2-startTime) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime2-startTime))));
System.out.println("Total elapsed time in execution of method build view is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime1-startTime1),TimeUnit.MILLISECONDS.toSeconds(endTime1-startTime1) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime1-startTime1))));
System.out.println("Total elapsed time in execution of method build view is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime1-startTime1),TimeUnit.MILLISECONDS.toSeconds(endTime1-startTime1) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime1-startTime1))));
System.out.println("Total elapsed time in execution of method TOTAL is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime-startTime),TimeUnit.MILLISECONDS.toSeconds(endTime-startTime) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime-startTime))));
System.out.println("Total elapsed time in execution of method TOTAL is :"+ String.format("%d min, %d sec",
TimeUnit.MILLISECONDS.toMinutes(endTime-startTime),TimeUnit.MILLISECONDS.toSeconds(endTime-startTime) -TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(endTime-startTime))));
}
return allReactionsAliases;
}
private static WorkspaceData loadData(String model) throws Exception {
UpdateBiggFiles dataCenter = new UpdateBiggFiles(model);
WorkspaceData data = new WorkspaceData();
data.getResultCompartments().addAll((compartmentsTransformer(dataCenter.getModelMetabolites())));
data.setResultMetabolites(metabolitesTransformer(dataCenter.getModelMetabolites()));
data.setResultEnzymes(enzymesTransformer(dataCenter.getModelReactions()));
data.setResultReactions(reactionsTransformer(dataCenter.getModelReactions()));
return data;
}
public void setCancel() {
......@@ -153,37 +218,42 @@ public class BiggDataLoader {
queue.add(container);
}
return metabolitesAliasesTester(queue);
return queue;
}
private static ConcurrentLinkedQueue<MetaboliteContainer> metabolitesAliasesTester(ConcurrentLinkedQueue<MetaboliteContainer> queue) {
for(MetaboliteContainer metabolite : queue) {
for(MetaboliteContainer existant : queue) {
if(existant.getName().equals(metabolite.getName()) &&
existant.getFormula().equals(metabolite.getFormula()) &&
existant.getName() != "" && existant.getFormula() != "" &&
!existant.getExternalIdentifier().equals(metabolite.getExternalIdentifier())){
if(metabolite.getNames()!=null) {
List<String> current = metabolitesAliases.get(metabolite.getExternalIdentifier());
current.add(existant.getExternalIdentifier());
metabolitesAliases.replace(metabolite.getExternalIdentifier(), current);
metabolite.getNames().add(existant.getExternalIdentifier());
}
else {
List<String> newNames = new ArrayList<String>();
newNames.add(existant.getExternalIdentifier());
metabolite.setNames(newNames);
metabolitesAliases.put(metabolite.getExternalIdentifier(), newNames);
}
private static ConcurrentLinkedQueue < MetaboliteContainer > metabolitesAliasesTester(ConcurrentLinkedQueue < MetaboliteContainer > queue)
{
for (MetaboliteContainer metabolite: queue)
{
for (MetaboliteContainer existant: allMetabolites)
{
if(!metabolite.getExternalIdentifier().equals(existant.getExternalIdentifier())) {
if(existant.getName().equals(metabolite.getName()) &&
existant.getFormula().equals(metabolite.getFormula()) &&
existant.getName() != "" && existant.getFormula() != "" &&
!existant.getExternalIdentifier().equals(metabolite.getExternalIdentifier())){
if(metabolite.getNames()!=null) {
List<String> current = metabolitesAliases.get(metabolite.getExternalIdentifier());
current.add(existant.getExternalIdentifier());
metabolitesAliases.replace(metabolite.getExternalIdentifier(), current);
metabolite.getNames().add(existant.getExternalIdentifier());
}
else {
List<String> newNames = new ArrayList<String>();
newNames.add(existant.getExternalIdentifier());
metabolite.setNames(newNames);
metabolitesAliases.put(metabolite.getExternalIdentifier(), newNames);
}
}
}
}
......@@ -478,17 +548,18 @@ public class BiggDataLoader {
}
return reactionsAliasesTester(queue);
return queue;
}
private static ConcurrentLinkedQueue<ReactionContainer> reactionsAliasesTester(ConcurrentLinkedQueue<ReactionContainer> queue) {
for(ReactionContainer reaction : queue) {
private static ConcurrentLinkedQueue < ReactionContainer > reactionsAliasesTester(ConcurrentLinkedQueue < ReactionContainer > queue)
{
for(ReactionContainer existant : queue) {
for (ReactionContainer reaction: queue)
{
if(reaction != existant) {
for (ReactionContainer existant: allReactions) {
if(!reaction.getExternalIdentifier().equals(existant.getExternalIdentifier())) {
List<MetaboliteContainer> reactantsReaction = reaction.getReactantsStoichiometry();
List<MetaboliteContainer> productsReaction = reaction.getProductsStoichiometry();
......@@ -498,6 +569,12 @@ public class BiggDataLoader {
int reactNr = reactantsReaction.size();
int prdNr = productsReaction.size();
int existantReactNr = reactantsExistant.size();
int existantPrdNr = productsExistant.size();
if(reactNr != existantReactNr || prdNr != existantPrdNr)
continue;
int equalReactants=0;
int equalProducts=0;
......@@ -509,22 +586,26 @@ public class BiggDataLoader {
if(skip == true)
break;
Boolean reactantsExistantValidated = false;
for(MetaboliteContainer metExi : reactantsExistant) {
if(metReac.getExternalIdentifier().equals(metExi.getExternalIdentifier())) {
equalReactants++;
reactantsExistantValidated=true;
break;
}else if(metabolitesAliases.get(metExi.getExternalIdentifier()) != null) {
if(metabolitesAliases.get(metExi.getExternalIdentifier()).contains(metReac.getExternalIdentifier())){
equalReactants++;
reactantsExistantValidated=true;
break;
}
}else {
skip = true;
break;
}
}
if(!reactantsExistantValidated)
skip = true;
}
if(skip == true)
......@@ -533,23 +614,28 @@ public class BiggDataLoader {
for(MetaboliteContainer proReac : productsReaction) {
if(skip == true)
break;
Boolean productsExistantValidated = false;
for(MetaboliteContainer proExi : productsExistant) {
if(proReac.getExternalIdentifier().equals(proExi.getExternalIdentifier())) {
equalProducts++;
productsExistantValidated = true;
break;
}else if(metabolitesAliases.get(proExi.getExternalIdentifier()) != null) {
if(metabolitesAliases.get(proExi.getExternalIdentifier()).contains(proReac.getExternalIdentifier())){
equalProducts++;
productsExistantValidated = true;
break;
}
}else {
skip = true;;
break;
}
}
if(!productsExistantValidated)
skip = true;
}
if(skip==true)
......@@ -565,6 +651,26 @@ public class BiggDataLoader {
newNames.add(existant.getExternalIdentifier());
reaction.setNames(newNames);
}
//Fill map to create a file with unique entries - similar reactions will be represented only once, the other will be associated with the first one
if(allReactionsAliases.containsKey(reaction.getExternalIdentifier())){
if(!allReactionsAliases.get(reaction.getExternalIdentifier()).contains(existant.getExternalIdentifier())) {
List<String> existantAliases = allReactionsAliases.get(reaction.getExternalIdentifier());
existantAliases.add(existant.getExternalIdentifier());
allReactionsAliases.put(reaction.getExternalIdentifier(), existantAliases);
}
}else {
List<String> existantAliases = new ArrayList<String>();
existantAliases.add(existant.getExternalIdentifier());
allReactionsAliases.put(reaction.getExternalIdentifier(), existantAliases);
}
}
......
......@@ -165,7 +165,7 @@ public class BiggModelDataLoader {
* @throws Exception
*/
public void loadBiggReaction(ReactionContainer reactionContainer) throws Exception {
if(this.databaseInitialData.getReactionsIdentifier().containsKey(reactionContainer.getExternalIdentifier())) {
boolean inModel = false;
......@@ -210,11 +210,10 @@ public class BiggModelDataLoader {
boolean go = true;
for(MetaboliteContainer reactant : reactionContainer.getReactantsStoichiometry()) {
String metaboliteID = null;
metaboliteID = reactant.getExternalIdentifier();
Integer metabolite_id = ModelMetabolitesServices.getCompoundIDbyExternalIdentifier(databaseName, metaboliteID);
if(metabolite_id<0)
......
......@@ -22,7 +22,7 @@ public class BiggModelMetabolitesServices extends ModelMetabolitesServices {
public static void loadMetabolites(String databaseName, ConcurrentLinkedQueue<MetaboliteContainer> metabolites, ConcurrentHashMap<String,Integer> metabolites_id) throws Exception {
for (MetaboliteContainer metaboliteContainer : metabolites) {
int res = getCompoundIDbyExternalIdentifier(databaseName, metaboliteContainer.getExternalIdentifier());
if(res>=0)
......@@ -43,7 +43,6 @@ public class BiggModelMetabolitesServices extends ModelMetabolitesServices {
metaboliteContainer.setMetaboliteID(id);
}
}
}
......
......@@ -4,6 +4,7 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
......@@ -32,6 +33,9 @@ import pt.uminho.ceb.biosystems.merlin.gui.utilities.NewWorkspaceRequirements;
import pt.uminho.ceb.biosystems.merlin.services.DatabaseServices;
import pt.uminho.ceb.biosystems.merlin.utilities.io.FileUtils;
import org.biojava.bio.symbol.*;
import org.biojava.bio.seq.*;
public class UpdateBiggFiles {
private static BiggModels model = new BiggModels();
......@@ -48,6 +52,7 @@ public class UpdateBiggFiles {
private static File modelsFile = new File(modelsPath);
private static File resultsFile = new File(resultsPath);
private static Map<String,String> updatedModels = new HashMap<String,String>();
private static Map<String,List<String>> reactionsAliases = new HashMap<String,List<String>>();
public static void main(String[] args) throws Exception {
......@@ -58,27 +63,31 @@ public class UpdateBiggFiles {
System.out.println("----starting get models details-----");
getModelsDetails(resultModel);
// if(resultsNeedUpdate) {
//
// writeHistoryFile();
//
// verifyResultsFolder();
//
// System.out.println("----genes reaction relation-----");
// genesReactionsRelation(resultModel);
// System.out.println("----creating fasta file-----");
// createFastaFile(resultModel);
// System.out.println("----Seq Id reaction relation-----");
// writeSeqIDReactionRelationFile();
// System.out.println("----Genes reactions rule file-----");
// genesReactionsRuleFile();
// System.out.println("----Models organisms file-----");
// getModelsOrganismsFile(resultModel);
//
// zipResultsFolder();
if(resultsNeedUpdate) {
writeHistoryFile();
exportBiggDatabase();
// }
verifyResultsFolder();
System.out.println("----reactions aliases file-----");
writeReactionsAliases();
System.out.println("----genes reaction relation-----");
genesReactionsRelation();
System.out.println("----creating fasta file-----");
createFastaFile(resultModel);
System.out.println("----Seq Id reaction relation-----");
writeSeqIDReactionRelationFile();
System.out.println("----Genes reactions rule file-----");
genesReactionsRuleFile();
System.out.println("----Models organisms file-----");
getModelsOrganismsFile(resultModel);
zipResultsFolder();
}
......@@ -360,7 +369,7 @@ public class UpdateBiggFiles {
* @return
* @throws Exception
*/
private static BiggModels readSpecificModelDetails(String model) throws Exception{
public static BiggModels readSpecificModelDetails(String model) throws Exception{
BiggModels biggModel = new BiggModels();
try {
......@@ -416,9 +425,8 @@ public class UpdateBiggFiles {
* @param resultModel
* @throws IOException
*/
private static void genesReactionsRelation(String resultModel) throws IOException {
private static void genesReactionsRelation() throws IOException {
JSONObject geneReactions = new JSONObject();
List<String> allGenes = new ArrayList<String>();
try {
......@@ -435,19 +443,55 @@ public class UpdateBiggFiles {
//model genes
biggModel.setGenes(readModelGenes(modelDetails.getJSONObject("gene")));
biggModel.setReactions(readModelReactions(modelDetails.getJSONObject("reaction")));
List<BiggReactions> biggReactions = biggModel.getReactions();
for(BiggGenes gene : biggModel.getGenes()) {
if(allGenes.contains(gene.getBiggId()))
continue;
else
allGenes.add(gene.getBiggId());
List<String> listReactionsBiggIds = new ArrayList<String>();
for(BiggReactions reaction : gene.getReaction()) {
listReactionsBiggIds.add(reaction.getBiggId());
for(BiggReactions reac : biggReactions) {
if(reac.getBiggId().equals(reaction.getBiggId())) {
for(BiggReactionResult result : reac.getResults()) {
listReactionsBiggIds.add(result.getExportedReactionId());
}
}
}
}
geneReactions.put(gene.getBiggId(), listReactionsBiggIds);
if(geneReactions.has(gene.getBiggId())) {
JSONArray jsonReactions = (JSONArray) geneReactions.get(gene.getBiggId());
ArrayList<String> currentReactions = new ArrayList<String>();
JSONArray jArray = (JSONArray)jsonReactions;
if (jArray != null) {
for (int i=0;i<jArray.length();i++){
currentReactions.add(jArray.getString(i));
}
}
for(String reaction : listReactionsBiggIds) {
if(!currentReactions.contains(reaction))
currentReactions.add(reaction);
}
geneReactions.remove(gene.getBiggId());
geneReactions.put(gene.getBiggId(), currentReactions);
}else
geneReactions.put(gene.getBiggId(), listReactionsBiggIds);
}
}
......@@ -484,7 +528,7 @@ public class UpdateBiggFiles {
int idNr=1;
for (String model : modelBiggIds) {
try {
//get current model details
File f = new File(modelsPath+model+".txt");
......@@ -503,27 +547,57 @@ public class UpdateBiggFiles {
biggModel.setGenes(readModelGenes(modelDetails.getJSONObject("gene")));
for(BiggGenes gene:biggModel.getGenes()){
//to check if this gene was already validated in another model
if(addedGenes.contains(gene.getBiggId()))
continue;
else
addedGenes.add(gene.getBiggId());
if(!addedSequences.contains(gene.getProteinSequence())){
String sequence = "";
if(gene.getProteinSequence() != null && gene.getProteinSequence() != ""){
sequence = gene.getProteinSequence();
}else {
if(gene.getDnaSequence() != null && gene.getDnaSequence() != "") {
try {
//create a DNA SymbolList
SymbolList symL = DNATools.createDNA(gene.getDnaSequence());
//transcribe to RNA
symL = DNATools.toRNA(symL);
//translate to protein
symL = RNATools.translate(symL);
sequence = symL.seqString();
sequence = sequence.replace("*", "");
}catch (IllegalAlphabetException ex) {
System.out.println(ex.getMessage());
}catch (IllegalSymbolException ex) {
System.out.println(ex.getMessage());
}catch (IllegalArgumentException ex) {
System.out.println(ex.getMessage());
}
}
}
if(!addedSequences.contains(sequence)){
//first time encounter
addedSequences.add(gene.getProteinSequence());
String formatted = "g"+String.format("%05d", idNr);
addedSequences.add(sequence);
String formatted = "g"+String.format("%09d", idNr);
currentSeqId = formatted;
sequenceIdsProteinSequence.put(gene.getProteinSequence(),formatted );
sequenceIdsProteinSequence.put(sequence,formatted );
sequenceIds.put(formatted, gene.getBiggId());
writeFastaFile(writer, gene.getProteinSequence(), currentSeqId);
writeFastaFile(writer, sequence, currentSeqId);
idNr++;
}else{
//already exists - writes in another file (repeatedSequences)
String seqId = sequenceIdsProteinSequence.get(gene.getProteinSequence());
String seqId = sequenceIdsProteinSequence.get(sequence);
if(!repeatedSequences.contains(gene)) {
repeatedSequences.add(gene);
......@@ -581,7 +655,7 @@ public class UpdateBiggFiles {
String biggIdsJoined = seqIdsGenesRelation.get(entry.getKey());
List<String> biggIdsSplitted = Arrays.asList(biggIdsJoined.split(";"));
biggIdsSplitted.replaceAll(String::trim);
for(String geneBiggId : biggIdsSplitted) {
......@@ -1059,20 +1133,27 @@ public class UpdateBiggFiles {
biggModel.setReactions(readModelReactions(modelDetails.getJSONObject("reaction"))); //Get all the reaction
for(BiggReactions reaction : biggModel.getReactions()) {
List<String> reactions = new ArrayList<String>();
if(reaction.getBiggId().equals("GLCtex") && model.equals("iAF1260"))
System.out.println();
if(reaction.getBiggId().equals("GLCtex"))
System.out.println();
List<BiggReactionResult> results = reaction.getResults();
for(BiggReactionResult result: results) {
List<String> reactions = new ArrayList<String>();
if(result.getGeneReactionRule().equals(null) || result.getGeneReactionRule().equals(""))
continue;
if(!geneReactionRuleMap.containsKey(reaction.getBiggId())) {//If the key (Gene ID) doesn't exist already
if(!geneReactionRuleMap.containsKey(result.getExportedReactionId())) {//If the key (Gene ID) doesn't exist already
if(reactions.isEmpty()) {//If it's the first result, just uses the value
reactions.add(result.getGeneReactionRule());
geneReactionRuleMap.put(reaction.getBiggId(), result.getGeneReactionRule());
geneReactionRuleMap.put(result.getExportedReactionId(), result.getGeneReactionRule());
}
else { //Else, checks if the value is different and updates
......@@ -1080,24 +1161,24 @@ public class UpdateBiggFiles {
continue;
else {
reactions.add(result.getGeneReactionRule());
geneReactionRuleMap.put(reaction.getBiggId(), geneReactionRuleMap.get(reaction.getBiggId()) + " or " + result.getGeneReactionRule());
geneReactionRuleMap.put(result.getExportedReactionId(), geneReactionRuleMap.get(reaction.getBiggId()) + " or " + result.getGeneReactionRule());
}
}
geneReactionRuleMapList.put(reaction.getBiggId(), reactions);
geneReactionRuleMapList.put(result.getExportedReactionId(), reactions);
}
else {//If already exists
reactions = geneReactionRuleMapList.get(reaction.getBiggId());
reactions = geneReactionRuleMapList.get(result.getExportedReactionId());
if(reactions.contains(result.getGeneReactionRule()))
continue;
else {
reactions.add(result.getGeneReactionRule());
geneReactionRuleMap.put(reaction.getBiggId(), geneReactionRuleMap.get(reaction.getBiggId()) + " or " + result.getGeneReactionRule());
geneReactionRuleMap.put(result.getExportedReactionId(), geneReactionRuleMap.get(result.getExportedReactionId()) + " or " + result.getGeneReactionRule());
}
geneReactionRuleMapList.put(reaction.getBiggId(), reactions);
geneReactionRuleMapList.put(result.getExportedReactionId(), reactions);
}
......@@ -1172,10 +1253,7 @@ public class UpdateBiggFiles {
public static void searchAndLoad(String databaseName){
try {
for(String model : modelBiggIds) {
BiggDataLoader.loadData(model, databaseName);
}
reactionsAliases = BiggDataLoader.loadAllData(modelBiggIds, databaseName);
}
catch (Exception e) {
e.printStackTrace();
......@@ -1251,6 +1329,56 @@ public class UpdateBiggFiles {
org.apache.commons.io.FileUtils.deleteDirectory(file);
}
/**
*
* write reaction aliases file
* @throws FileNotFoundException
*/
private static void writeReactionsAliases() throws FileNotFoundException {
List<String> validatedReactions = new ArrayList<String>();
PrintWriter writer = new PrintWriter(resultsPath+"reactionsAliases.txt");
Map<String,List<String>> auxReactionsAliases = new HashMap<String, List<String>>();
for (Entry<String,List<String>> entry : reactionsAliases.entrySet()){
auxReactionsAliases.put(entry.getKey(), entry.getValue());
}
for (Entry<String,List<String>> entry : reactionsAliases.entrySet()){
if(!validatedReactions.contains(entry.getKey()))
validatedReactions.add(entry.getKey());
else
continue;
for(String alias : entry.getValue()) {
validatedReactions.add(alias);
if(auxReactionsAliases.containsKey(alias))
auxReactionsAliases.remove(alias);
}
}
for (Entry<String,List<String>> entry : auxReactionsAliases.entrySet()){
String aliases = "";
for(String alias : entry.getValue()) {
aliases = aliases + alias + "; ";
}
writer.println(entry.getKey() + " - " + aliases);
}
writer.close();
}
public BiggModels getModel() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment