Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit d0a77108 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

triage neo4j database created

parent 365068ef
......@@ -8,25 +8,41 @@
</description>
<repositories>
<repository>
<id>nexus</id>
<name>Nexus BioSystems Server</name>
<!-- <url>http://192.168.1.99/nexus/content/groups/public/</url> -->
<url>http://193.137.11.210/nexus/content/groups/public/</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<!-- <dependency>
<groupId>utilities</groupId>
<artifactId>triage-utilities</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency> -->
<dependency>
<groupId>tcdb</groupId>
<artifactId>triage</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
......@@ -92,6 +108,18 @@
<groupId>pt.uminho.sysbio.biosynthframework</groupId>
<artifactId>biosynth-integration</artifactId>
<version>0.9.0-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
<version>3.4.1</version>
</dependency>
<dependency>
......@@ -135,6 +163,6 @@
<artifactId>slf4j-api</artifactId>
<version>1.8.0-beta1</version>
</dependency>
</dependencies>
</project>
......@@ -11,6 +11,7 @@ import java.util.Set;
import com.google.common.collect.ImmutableList;
import containers.InternalMetabolitesProperties;
import enumerators.MetaboliteReferenceDatabaseEnum;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import tcdb.capsules.BiosynthMetaboliteProperties;
......
package internalDB;
public enum MetaboliteReferenceDatabaseEnum{
//kegg
KEGGCompound,
LIGANDCPD, //also kegg
KEGG,
KEGGGLYCAN,
KEGGDrug,
// ModelSEED
SEEDCompound,
//Metacyc
Biocyc,
bigg2,
Ecocyc;
///////////// ALL DATABASES ////////////////
// HumanMetabolomeDatabase
// LIPIDMAPS
// iJR904
// PUBCHEMSID
// JCGGDB
// KEGGGLYCAN
// GlycoEpitope
// CHEBI
// LipidBank
// KEGG
// KNAPSACK
// iAF692
// iAF1260
// KEGGCompound
// Reactome
// CCSD
// REFMET
// CAS
// NCI
// PDBCCD
// iSB619
// bigg2
// LipidMaps
// GlyTouCan
// BioCyc
// iND750
// iIT341
// 3DMET
// ChEBI
// Wikipedia
// KEGGDrug
// KNApSAcK
// METABOLIGHTS
// PubChem
// LIPID_MAPS
// ChEMBL
// HMDB
// BRENDACOMPOUND
// CHEMSPIDER
// Recon1
// PUBCHEM
// SEEDCompound
// UMBBDCPD
// ECOCYC
// MetaNetX(MNX)Chemical
// iNJ661
// LIGANDCPD
// NIKKAJI
// DRUGBANK
}
package internalDB;
import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Observable;
import java.util.Observer;
import java.util.Scanner;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.biojava.nbio.core.sequence.io.FastaReaderHelper;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
......@@ -13,8 +22,16 @@ import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;
import biosynth.initializeNeo4jdb;
import blast.Blast;
import containers.BiosynthMetabolites;
import enumerators.MetaboliteReferenceDatabaseEnum;
import files.FilesUtils;
import files.JSONFilesUtils;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ncbi.CreateGenomeFile;
//import pt.uminho.ceb.biosystems.merlin.local.alignments.core.RunSimilaritySearch;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.AlignmentScoreType;
import pt.uminho.ceb.biosystems.merlin.utilities.Enumerators.Method;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import pt.uminho.sysbio.biosynth.integration.neo4j.BiodbMetaboliteNode;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
......@@ -22,7 +39,9 @@ import reactions.IdentifyReactionsMetabolites;
import reactions.TransportReactionsBuilder;
import tcdb.capsules.BiosynthMetaboliteProperties;
import tcdb.capsules.TcNumberContainer2;
import triageDatabase.PopulateTriageNeo4jDatabase;
import utilities.FileUtils;
import utilities.triage_utilities.Utilities;
/**
* @author Davide
......@@ -102,25 +121,57 @@ public class WriteByMetabolitesID {
public static void test(Map<String, BiosynthMetaboliteProperties> data, BiodbGraphDatabaseService service, BiosynthMetabolites namesAndIDsContainer) {
try {
/////TRIAGE
Map<String, Set<TcNumberContainer2>> reactionsData = JSONFilesUtils.readJSONtcdbReactionsFile();
IdentifyReactionsMetabolites metabolitesIdentification = new IdentifyReactionsMetabolites(reactionsData, namesAndIDsContainer, service);
Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs = metabolitesIdentification.getTcdbMetabolitesIDs();
Map<String, String[]> forChildsSearch = metabolitesIdentification.getforChildsSearch();
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service).getResults();
// JSONFilesUtils.writeJSONTriageReactions(newData);
// Map<String, Set<TcNumberContainer2>> newData2 = new HashMap<>();
//
// newData2.put("Q8SKU2", newData.get("Q8SKU2"));
// newData2.put("Q9MUK5", newData.get("Q9MUK5"));
// newData2.put("Q9SC41", newData.get("Q9SC41"));
// newData2.put("O43715", newData.get("O43715"));
// newData2.put("O24303", newData.get("O24303"));
// newData2.put("Q41010", newData.get("Q41010"));
// newData2.put("Q41009", newData.get("Q41009"));
// newData2.put("P0C891", newData.get("P0C891"));
// newData2.put("Q02028", newData.get("Q02028"));
// newData2.put("O49931", newData.get("O49931"));
// newData2.put("Q9ZST8", newData.get("Q9ZST8"));
// newData2.put("Q9ZST9", newData.get("Q9ZST9"));
new PopulateTriageNeo4jDatabase(newData);
/////TRIAGE
// Set<String> tcdbMetabolites = ReactionsMetabolites.getMetabolitesFromReactions(reactionsData);
//
// System.out.println("TOTAL FOR SEARCH: " + tcdbMetabolites.size());
//
// Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs = ReactionsMetabolites.getMetabolitesIDs(tcdbMetabolites, namesAndIDsContainer, service);
Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs = new IdentifyReactionsMetabolites(reactionsData, namesAndIDsContainer, service).getTcdbMetabolitesIDs();
TransportReactionsBuilder.generateTransportReactions(tcdbMetabolitesIDs, reactionsData);
//BLAST
// Blast blast = new Blast();
//BLAST
// results = run_smith_waterman.runTransportSearch(querySpecificThreshold);
//{366869=organic cations, 367055=organic cations, 366939=organic cations, 367113=organic cations, 1462=Na+, 366996=organic cations, 366868=organic cations}
// None node
// System.out.println("TOTAL FOUND: " + tcdbMetabolitesIDs.size());
......@@ -331,7 +382,6 @@ public class WriteByMetabolitesID {
// System.out.println("saving...");
//
// FindMetabolitesID.saveInfoInFile2("C:\\\\Users\\\\Davide\\\\Documents\\\\InternalDB\\\\AllBiosynthMetabolites.txt", allMetabolites);
System.out.println("Done!!!");
}
catch (Exception e) {
......@@ -495,9 +545,9 @@ public class WriteByMetabolitesID {
if(useCache) {
namesLowerCaseWithoutSigns = FileUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns2.txt");
namesWithoutSigns = FileUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns2.txt");
namesLowerCase = FileUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase2.txt");
namesLowerCaseWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCaseWithoutSigns2.txt");
namesWithoutSigns = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesWithoutSigns2.txt");
namesLowerCase = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getNamesLowerCase2.txt");
compounds = FileUtils.readMapFromFile2("C:\\Users\\Davide\\Documents\\InternalDB\\info\\getMetabolitesIDs2.txt");
......@@ -744,9 +794,7 @@ public class WriteByMetabolitesID {
for(String name : names.split(";")) {
if(!name.isEmpty()) {
synonyms.add(name.replaceAll("<i>", "").replaceAll("</i>", "").replaceAll("\\[", "")
.replaceAll("\\]", "").replaceAll("</sup>", "").replaceAll("<sup>", "").replaceAll("</I>", "").replaceAll("<I>", "")
.replaceAll("</SUP>", "").replaceAll("<SUP>", "").replaceAll("</sub>", "").replaceAll("<sub>", "").replaceAll("^(an*\\s+)", ""));
synonyms.add(Utilities.processBiosynthName(name));
}
}
......
......@@ -24,6 +24,7 @@ public class IdentifyReactionsMetabolites {
private Map<String, Map<MetaboliteMajorLabel, String>> allMetabolitesByName;
private Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs;
private Set<String> metabolites;
private Map<String, String[]> forChildsSearch;
Synonyms dictionary;
......@@ -59,6 +60,7 @@ public class IdentifyReactionsMetabolites {
public Map<String, Map<String, MetaboliteMajorLabel>> getMetabolitesIDs(Set<String> metabolitesFromFile, BiosynthMetabolites namesAndIDsContainer, BiodbGraphDatabaseService service) {
tcdbMetabolitesIDs = new HashMap<>();
this.forChildsSearch = new HashMap<>();
// Set<String> forSearch = new HashSet<>(metabolites);
......@@ -393,29 +395,45 @@ public class IdentifyReactionsMetabolites {
ReactionContainer reactionContainer = tcContainer.getReactionContainer(id);
String reaction = reactionContainer.getReaction();
metabolites.addAll(getMetabolitesToBeReplaced(reaction));
reaction = reaction.replaceAll(ReactionContainer.REV_TOKEN, "\\+").replaceAll(ReactionContainer.IRREV_TOKEN, "\\+")
.replaceAll("\\(" + ReactionContainer.INTERIOR_COMPARTMENT + "\\)", "").replaceAll("\\(" + ReactionContainer.EXTERIOR_COMPARTMENT + "\\)", "");
}
}
}
String[] metabs = reaction.split(" \\+ ");
return metabolites;
for(String metab : metabs) {
}
/**
* Get metabolites present in one reaction
*
* @param reaction
* @return
*/
public static Set<String> getMetabolitesToBeReplaced(String reaction){
Set<String> metabolites = new HashSet<>();
reaction = reaction.replaceAll(ReactionContainer.REV_TOKEN, "\\+").replaceAll(ReactionContainer.IRREV_TOKEN, "\\+")
.replaceAll("\\(" + ReactionContainer.INTERIOR_COMPARTMENT + "\\)", "").replaceAll("\\(" + ReactionContainer.EXTERIOR_COMPARTMENT + "\\)", "");
metab = metab.replaceAll("^(\\+\\s)", "");
String[] metabs = reaction.split(" \\+ ");
// if(metab.trim().equals("+ Fe2+"))
// System.out.println(reactionContainer.getReaction() + "\t" + tcContainer.getTcNumber());
for(String metab : metabs) {
metabolites.add(metab.trim());
}
metab = metab.replaceAll("^(\\+\\s)", "");
// if(metab.trim().equals("+ Fe2+"))
// System.out.println(reactionContainer.getReaction() + "\t" + tcContainer.getTcNumber());
}
}
metabolites.add(metab.trim());
}
return metabolites;
}
/**
......@@ -515,10 +533,20 @@ public class IdentifyReactionsMetabolites {
* @param ids
* @return
*/
private static MetaboliteMajorLabel selectMetaboliteMajorLabel(String metabolite, Map<MetaboliteMajorLabel, String> ids){
private MetaboliteMajorLabel selectMetaboliteMajorLabel(String metabolite, Map<MetaboliteMajorLabel, String> ids){
try {
if(ids.containsKey(MetaboliteMajorLabel.MetaCyc)) {
String[] entry = new String[2];
entry[0] = ids.get(MetaboliteMajorLabel.MetaCyc);
entry[1] = MetaboliteMajorLabel.MetaCyc.toString();
forChildsSearch.put(metabolite, entry);
}
if(ids.containsKey(MetaboliteMajorLabel.ModelSeed))
return MetaboliteMajorLabel.ModelSeed;
......@@ -552,4 +580,12 @@ public class IdentifyReactionsMetabolites {
public Map<String, Map<String, MetaboliteMajorLabel>> getTcdbMetabolitesIDs() {
return tcdbMetabolitesIDs;
}
/**
* @return the forChildsSearch
*/
public Map<String, String[]> getforChildsSearch() {
return forChildsSearch;
}
}
......@@ -10,7 +10,7 @@ import java.util.TreeMap;
import connection.ReadTcdbFastaFile;
import files.ReadExcelFile;
import pt.uminho.sysbio.common.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import tcdb.capsules.FastaTcdb;
import tcdb.capsules.GeneContainer;
import tcdb.capsules.TcdbGene;
......
package reactions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.neo4j.graphdb.Node;
import files.FilesUtils;
import pt.uminho.sysbio.biosynth.integration.io.dao.neo4j.MetaboliteMajorLabel;
import pt.uminho.sysbio.biosynthframework.BiodbGraphDatabaseService;
import relations.MetabolitesChilds;
import tcdb.capsules.BiosynthMetaboliteProperties;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.TcNumberContainer2;
import tcdb.capsules.TcdbMetabolitesContainer;
import utilities.FileUtils;
import utilities.triage_utilities.Utilities;
public class TransportReactionsBuilder {
Map<String, String> reactionsIDsMap;
Map<String, Set<TcNumberContainer2>> newData;
public TransportReactionsBuilder(Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs,
Map<String, Set<TcNumberContainer2>> reactionsData, Map<String, String[]> forChildsSearch, BiodbGraphDatabaseService service) {
this.newData = new HashMap<>();
this.reactionsIDsMap = FilesUtils.readMapFromFile("C:\\Users\\Davide\\Documents\\InternalDB\\reactionsIDS.txt");
generateTransportReactions(tcdbMetabolitesIDs, reactionsData, forChildsSearch, service);
// Map<String, Set<TcNumberContainer2>> reactionsData2 = new HashMap<>(); //for debug purposes
// reactionsData2.put("Q8WY22", reactionsData.get("Q8WY22"));
// generateTransportReactions(tcdbMetabolitesIDs, reactionsData2, forChildsSearch, service);
FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\reactionsIDS.txt", reactionsIDsMap);
FilesUtils.saveMapInFile("C:\\Users\\Davide\\Documents\\InternalDB\\".concat(FilesUtils.generateFileName("reactionsIDS", ".txt")), reactionsIDsMap); //backup
// Map<String, Set<TcNumberContainer2>> reactionsData = new HashMap<>();
//
// reactionsData.put("P29897", reactionsDataaaaaa.get("P29897"));
}
private void generateTransportReactions(Map<String, Map<String, MetaboliteMajorLabel>> tcdbMetabolitesIDs,
Map<String, Set<TcNumberContainer2>> reactionsData, Map<String, String[]> forChildsSearch, BiodbGraphDatabaseService service) {
Map<String, Set<Long>> childs = new HashMap<>();
Map<String, String> childNames = new HashMap<>();
Map<String, String> metabolites = new HashMap<>();
int reactionsCounter = 0;
Set<String> uniqueReactions = new HashSet<>();
Set<Integer> uniqueReactionsHashCode = new HashSet<>();
for(String accession : reactionsData.keySet()) {
Set<TcNumberContainer2> setTcContainers = new HashSet<>();
for(TcNumberContainer2 tcContainer : reactionsData.get(accession)) {
TcNumberContainer2 newTcContainer = new TcNumberContainer2(tcContainer.getTcNumber());
// System.out.println("TCNUMER: " + tcContainer.getTcNumber());
for(Integer key : tcContainer.getAllReactionsIds()) {
// System.out.println();
try {
Map<String, String> reverseKeys = new HashMap<>();
// Map<String, String> metabolites = new HashMap<>();
List<Set<String>> sets = new ArrayList<>();
ReactionContainer reactionContainer = tcContainer.getReactionContainer(key);
// System.out.println("ReactionID: " + key + "\t---> " + reactionContainer.getReaction());
Set<String> metabolitesForReplacement = IdentifyReactionsMetabolites.getMetabolitesToBeReplaced(reactionContainer.getReaction());
// System.out.println("metabolitesFor replac: " + metabolitesForReplacement );
for(String metabolite : metabolitesForReplacement) {
// System.out.println(metabolite);
// System.out.println("contains >> " + tcdbMetabolitesIDs.containsKey(metabolite));
if(tcdbMetabolitesIDs.containsKey(metabolite)) {
String entryID = "";
MetaboliteMajorLabel label = null;
for(String id : tcdbMetabolitesIDs.get(metabolite).keySet()) { //this map will always have only one entry
label = tcdbMetabolitesIDs.get(metabolite).get(id);
entryID = id;
}
// System.out.println(entryID + "\t" + label);
metabolites.put(metabolite, entryID.concat("=").concat(label.toString()));
// System.out.println("childs.containsKey(metabolite): " + childs.containsKey(metabolite));
if(!childs.containsKey(metabolite)) {
if(!label.equals(MetaboliteMajorLabel.MetaCyc) && forChildsSearch.containsKey(metabolite)) {
entryID = forChildsSearch.get(metabolite)[0];
label = MetaboliteMajorLabel.valueOf(forChildsSearch.get(metabolite)[1]);
}
childs.put(metabolite, MetabolitesChilds.getMetaboliteChilds(entryID, label, service));
}
if(childs.containsKey(metabolite)) {
Set<String> childsAsString = Utilities.convertSetToSetString(childs.get(metabolite));
for(String childID : childsAsString) {
reverseKeys.put(childID, metabolite);
if(!childNames.containsKey(metabolite)) {
String[] properties = getMetaboliteProperties(childID, service);
String childName = properties[0];
String childEntryID = properties[1];
String childLabel = properties[2];
// System.out.println(childEntryID.concat("=").concat(childLabel));
childNames.put(childID, childName);
metabolites.put(childID, childEntryID.concat("=").concat(childLabel));
}
}
sets.add(childsAsString);
}
else {
metabolites.put(metabolite, metabolite);
Set<String> met = new HashSet<>();
met.add(metabolite);
reverseKeys.put(metabolite, metabolite);
if(!childNames.containsKey(metabolite))
childNames.put(metabolite, metabolite);
sets.add(met);
}
}