Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit db3ad775 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

uniprotAPI to search taxonomy

parent 09194e03
...@@ -121,6 +121,8 @@ public class WriteByMetabolitesID { ...@@ -121,6 +121,8 @@ public class WriteByMetabolitesID {
Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(-1, true, tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data).getResults(); Map<String, Set<TcNumberContainer2>> newData = new TransportReactionsBuilder(-1, true, tcdbMetabolitesIDs, reactionsData, forChildsSearch, service, data).getResults();
JSONFilesUtils.writeJSONTriageReactions(newData);
////COUNTS ////COUNTS
// MetabolitesChilds.getMetaboliteChilds(-1, "META:Ions", MetaboliteMajorLabel.MetaCyc, service); // MetabolitesChilds.getMetaboliteChilds(-1, "META:Ions", MetaboliteMajorLabel.MetaCyc, service);
...@@ -166,8 +168,6 @@ public class WriteByMetabolitesID { ...@@ -166,8 +168,6 @@ public class WriteByMetabolitesID {
//////////// ////////////
// JSONFilesUtils.writeJSONTriageReactions(newData);
// Map<String, Set<TcNumberContainer2>> newData2 = new HashMap<>(); // Map<String, Set<TcNumberContainer2>> newData2 = new HashMap<>();
// //
// newData2.put("P29897", newData.get("P29897")); // newData2.put("P29897", newData.get("P29897"));
...@@ -183,7 +183,7 @@ public class WriteByMetabolitesID { ...@@ -183,7 +183,7 @@ public class WriteByMetabolitesID {
// newData2.put("P54862", newData.get("P54862")); // newData2.put("P54862", newData.get("P54862"));
// newData2.put("P27243", newData.get("P27243")); // newData2.put("P27243", newData.get("P27243"));
new PopulateTriageNeo4jDatabase(newData); // new PopulateTriageNeo4jDatabase(newData);
/////TRIAGE /////TRIAGE
......
package reactions; package reactions;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
...@@ -18,7 +17,6 @@ import blast.Blast; ...@@ -18,7 +17,6 @@ import blast.Blast;
import enumerators.TypeOfTransporter; import enumerators.TypeOfTransporter;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import javafx.util.Pair; import javafx.util.Pair;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule; import pt.uminho.ceb.biosystems.merlin.utilities.containers.capsules.AlignmentCapsule;
import tcdb.capsules.GeneContainer; import tcdb.capsules.GeneContainer;
import tcdb.capsules.ReactionContainer; import tcdb.capsules.ReactionContainer;
...@@ -27,9 +25,6 @@ import triageDatabase.TriageGraphDatabaseService; ...@@ -27,9 +25,6 @@ import triageDatabase.TriageGraphDatabaseService;
import triageDatabase.TriageNeo4jInitializer; import triageDatabase.TriageNeo4jInitializer;
import triageDatabase.TriageNodeLabel; import triageDatabase.TriageNodeLabel;
import triageDatabase.TriageRelationshipType; import triageDatabase.TriageRelationshipType;
import uk.ac.ebi.kraken.interfaces.uniprot.NcbiTaxon;
import uk.ac.ebi.kraken.interfaces.uniprot.Organism;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
public class ProvideTransportReactionsToGenes { public class ProvideTransportReactionsToGenes {
...@@ -110,13 +105,13 @@ public class ProvideTransportReactionsToGenes { ...@@ -110,13 +105,13 @@ public class ProvideTransportReactionsToGenes {
private void start() { private void start() {
//getTaxonomies(); implementar a parte de ir buscar a taxonomia e o organismo base de dados neo4j getTaxonomies();
buildReactionContainersForBlastResults(); buildReactionContainersForBlastResults();
logger.debug("Gene containers creation initiated."); logger.debug("Gene containers creation initiated.");
Map<String, GeneContainer> data = processHomologousGenesInfo(); Map<String, GeneContainer> data = buildGenesContainers();
logger.debug("All gene containers created."); logger.debug("All gene containers created.");
...@@ -146,7 +141,6 @@ public class ProvideTransportReactionsToGenes { ...@@ -146,7 +141,6 @@ public class ProvideTransportReactionsToGenes {
organisms.put(accession, node.getProperty(TriageGeneralProperties.Organism.toString()).toString()); organisms.put(accession, node.getProperty(TriageGeneralProperties.Organism.toString()).toString());
} }
} }
/** /**
...@@ -198,71 +192,6 @@ public class ProvideTransportReactionsToGenes { ...@@ -198,71 +192,6 @@ public class ProvideTransportReactionsToGenes {
} }
/**
* Process homologous genes information
*
* @param homologousGenes
*/
private Map<String, GeneContainer> processHomologousGenesInfo() {
try {
Map<String, List<NcbiTaxon>> taxonomies = new HashMap<>();
Map<String, Organism> organisms = new HashMap<>();
List<String> toSearch = new ArrayList<>();
for(String key : homologousGenes.keySet()) {
for(String gene : homologousGenes.get(key)) {
if(!toSearch.contains(gene))
toSearch.add(gene);
}
}
////// Uniprot taxonomy identification
logger.debug("Taxonomy records to search with UniProAPI: {}", toSearch.size());
// System.out.println(toSearch);
List<UniProtEntry> uniprotList = UniProtAPI.getEntriesFromUniProtIDs(toSearch, 0);
for(UniProtEntry uniprot : uniprotList) {
try {
// System.out.println("HERE!!!");
String uniprotID = uniprot.getPrimaryUniProtAccession().getValue();
// System.out.println(uniprotID);
taxonomies.put(uniprotID, uniprot.getTaxonomy());
organisms.put(uniprotID, uniprot.getOrganism());
}
catch (Exception e) {
// System.out.println(uniprot.getSequence());
// e.printStackTrace();
}
// tcdbGenes.put(uniprotID, new TcdbGene(data.getSequence(), data.getTcNumber(), null, uniprot.getOrganism(), uniprot.getTaxonomy(), false, null)); //rever isto por causa da situao de o mesmo Acc ter dois TCs
}
return buildGenesContainers(organisms, taxonomies);
}
catch (Exception e) {
logger.error("An error occurred while processing the homologous genes!!!");
e.printStackTrace();
}
return null;
}
/** /**
* Build containers and count common taxa. * Build containers and count common taxa.
* *
...@@ -272,7 +201,7 @@ public class ProvideTransportReactionsToGenes { ...@@ -272,7 +201,7 @@ public class ProvideTransportReactionsToGenes {
* @param homologousGenes * @param homologousGenes
* @return * @return
*/ */
private Map<String, GeneContainer> buildGenesContainers(Map<String, Organism> organisms, Map<String, List<NcbiTaxon>> taxonomies) { private Map<String, GeneContainer> buildGenesContainers() {
Map<String, GeneContainer> genes = new HashMap<>(); Map<String, GeneContainer> genes = new HashMap<>();
...@@ -298,15 +227,15 @@ public class ProvideTransportReactionsToGenes { ...@@ -298,15 +227,15 @@ public class ProvideTransportReactionsToGenes {
for(int i = 0; i < taxonomy.length; i++) { for(int i = 0; i < taxonomy.length; i++) {
List<NcbiTaxon> hTaxonomy = taxonomies.get(homologue); String[] hTaxonomy = taxonomies.get(homologue);
if(taxonomy[i].trim().equalsIgnoreCase(hTaxonomy.get(i).toString())) if(taxonomy[i].trim().equalsIgnoreCase(hTaxonomy[i]))
count++; count++;
else else
break; break;
} }
if(organism.equals(organisms.get(homologue).getScientificName().toString())) if(organism.equals(organisms.get(homologue)))
count++; count++;
} }
else else
......
...@@ -218,7 +218,7 @@ public class TransportReactionsBuilder { ...@@ -218,7 +218,7 @@ public class TransportReactionsBuilder {
// System.out.println(distributions); // System.out.println(distributions);
newTcContainer = generateAllPossibleReactions(distributions, reverseKeys, childNames, metabolites, formulas, reactionContainer, newTcContainer); newTcContainer = generateAllPossibleReactions(distributions, reverseKeys, childNames, metabolites, formulas, reactionContainer, newTcContainer, tcContainer.getTcNumber());
for(int i : newTcContainer.getAllReactionsIds()) { for(int i : newTcContainer.getAllReactionsIds()) {
uniqueReactions.add(newTcContainer.getReactionContainer(i).getReactionID()); uniqueReactions.add(newTcContainer.getReactionContainer(i).getReactionID());
...@@ -260,7 +260,7 @@ public class TransportReactionsBuilder { ...@@ -260,7 +260,7 @@ public class TransportReactionsBuilder {
* @return * @return
*/ */
private TcNumberContainer2 generateAllPossibleReactions(List<Set<String>> distributions, Map<String, String> reverseKeys, Map<String, String> childNames, private TcNumberContainer2 generateAllPossibleReactions(List<Set<String>> distributions, Map<String, String> reverseKeys, Map<String, String> childNames,
Map<String, String> metabolites, Map<String, String> formulas, ReactionContainer reactionContainer, TcNumberContainer2 newTcContainer) { Map<String, String> metabolites, Map<String, String> formulas, ReactionContainer reactionContainer, TcNumberContainer2 newTcContainer, String tcNumber) {
for(Set<String> distribution : distributions) { for(Set<String> distribution : distributions) {
...@@ -291,6 +291,16 @@ public class TransportReactionsBuilder { ...@@ -291,6 +291,16 @@ public class TransportReactionsBuilder {
String reactantIDs = sortReactantsAndProducts(reactionIDsSplit[0]); String reactantIDs = sortReactantsAndProducts(reactionIDsSplit[0]);
String productIDs = sortReactantsAndProducts(reactionIDsSplit[1]); String productIDs = sortReactantsAndProducts(reactionIDsSplit[1]);
if(tcNumber.equals("4.C.1.1.4")) {
System.out.println(reaction);
System.out.println(idsReaction);
System.out.println(formulasReaction);
System.out.println();
}
if(formulasReaction != null) { if(formulasReaction != null) {
......
...@@ -13,15 +13,19 @@ import org.neo4j.graphdb.Transaction; ...@@ -13,15 +13,19 @@ import org.neo4j.graphdb.Transaction;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import files.FilesUtils;
import files.ReadExcelFile;
import internalDB.WriteByMetabolitesID; import internalDB.WriteByMetabolitesID;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import tcdb.capsules.ReactionContainer; import tcdb.capsules.ReactionContainer;
import tcdb.capsules.TcNumberContainer2; import tcdb.capsules.TcNumberContainer2;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry; import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
import utilities.UniprotAPIExtension;
public class PopulateTriageNeo4jDatabase { public class PopulateTriageNeo4jDatabase {
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class); private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
private static final String TAXONOMIES_PATH = "C:\\Users\\Davide\\Documents\\reactionsBuilderTriage\\taxonomies.txt";
private static final String ORGANISMS_PATH = "C:\\Users\\Davide\\Documents\\reactionsBuilderTriage\\organisms.txt";
private TriageGraphDatabaseService service; private TriageGraphDatabaseService service;
private Map<String, Set<TcNumberContainer2>> data; private Map<String, Set<TcNumberContainer2>> data;
...@@ -80,7 +84,7 @@ public class PopulateTriageNeo4jDatabase { ...@@ -80,7 +84,7 @@ public class PopulateTriageNeo4jDatabase {
organism = organisms.get(accession); organism = organisms.get(accession);
} }
accessionNode.setProperty(TriageGeneralProperties.Taxonomy.toString(), taxonomy); //implementar accessionNode.setProperty(TriageGeneralProperties.Taxonomy.toString(), taxonomy);
accessionNode.setProperty(TriageGeneralProperties.Organism.toString(), organism); accessionNode.setProperty(TriageGeneralProperties.Organism.toString(), organism);
logger.debug("Uniprot Accession node created for accession: {}", accession); logger.debug("Uniprot Accession node created for accession: {}", accession);
...@@ -144,33 +148,88 @@ public class PopulateTriageNeo4jDatabase { ...@@ -144,33 +148,88 @@ public class PopulateTriageNeo4jDatabase {
private void findTaxonomy(){ private void findTaxonomy(){
taxonomies = new HashMap<>(); taxonomies = FilesUtils.readMapFromFile(TAXONOMIES_PATH);
organisms = new HashMap<>(); organisms = FilesUtils.readMapFromFile(ORGANISMS_PATH);
int taxInitialSize = taxonomies.size();
int orgInitialSize = organisms.size();
List<String> toSearch = new ArrayList<>(data.keySet()); List<String> toSearch = new ArrayList<>(data.keySet());
List<UniProtEntry> uniprotList = UniProtAPI.getEntriesFromUniProtIDs(toSearch, 0); toSearch.replaceAll(String::toUpperCase);
if(uniprotList != null) { for(String acc : new ArrayList<>(toSearch)) {
for(UniProtEntry uniprot : uniprotList) { if(taxonomies.containsKey(acc) && organisms.containsKey(acc))
toSearch.remove(acc);
}
Map<String, String> backupData = getOrganismsFromTCDB();
if(!toSearch.isEmpty()) {
List<UniProtEntry> uniprotList = UniprotAPIExtension.getEntriesFromUniProtIDs(toSearch, 0);
try { if(uniprotList != null) {
String uniprotID = uniprot.getPrimaryUniProtAccession().getValue(); for(UniProtEntry uniprot : uniprotList) {
taxonomies.put(uniprotID, uniprot.getTaxonomy().toString()); if(uniprot != null) {
organisms.put(uniprotID, uniprot.getOrganism().toString()); String uniprotID = uniprot.getPrimaryUniProtAccession().getValue();
} taxonomies.put(uniprotID, uniprot.getTaxonomy().toString());
catch (Exception e) { organisms.put(uniprotID, uniprot.getOrganism().toString());
// System.out.println(uniprot.getSequence());
// e.printStackTrace(); toSearch.remove(uniprotID);
}
} }
}
// tcdbGenes.put(uniprotID, new TcdbGene(data.getSequence(), data.getTcNumber(), null, uniprot.getOrganism(), uniprot.getTaxonomy(), false, null)); //rever isto por causa da situao de o mesmo Acc ter dois TCs if(!toSearch.isEmpty()) {
for(String uniprotID : toSearch) {
if(backupData.containsKey(uniprotID)) {
String org = backupData.get(uniprotID);
String tax = UniprotAPIExtension.findTaxonmyByOrganismName(org, 0);
if(tax != null) {
taxonomies.put(uniprotID.replaceAll("\\[", "").replaceAll("\\]", ""), tax);
organisms.put(uniprotID, org);
}
}
}
}
if(taxonomies.size() != taxInitialSize || organisms.size() != orgInitialSize) {
FilesUtils.saveMapInFile(TAXONOMIES_PATH, taxonomies);
FilesUtils.saveMapInFile(ORGANISMS_PATH, organisms);
} }
} }
} }
/**
* @param accession
* @return
*/
private static Map<String, String> getOrganismsFromTCDB() {
List<String[]> excel = ReadExcelFile.getData("C:\\Users\\Davide\\OneDrive - Universidade do Minho\\UMinho\\Tese\\Internal database\\results.xlsx");
Map<String, String> data = new HashMap<>();
for(String[] line : excel) {
String accession = line[0].split("@")[0];
String organism = line[1];
if(!data.containsKey(accession) && !organism.isEmpty())
data.put(accession, organism);
}
return data;
}
} }
package utilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.utilities.MySleep;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
import uk.ac.ebi.uniprot.dataservice.client.QueryResult;
import uk.ac.ebi.uniprot.dataservice.client.uniprot.UniProtQueryBuilder;
import uk.ac.ebi.uniprot.dataservice.query.Query;
public class UniprotAPIExtension extends UniProtAPI{
final static Logger logger = LoggerFactory.getLogger(UniprotAPIExtension.class);
/**
* @param name
* @param errorCount
* @return
*/
public static String findTaxonmyByOrganismName(String name, int errorCount) {
UniProtAPI.getInstance();
try {
Query query = UniProtQueryBuilder.organismName("treponema");
QueryResult<UniProtEntry> entries = uniProtService.getEntries(query);
while (entries.hasNext()) {
UniProtEntry entry = entries.next();
if(!entry.getTaxonomy().toString().isEmpty() && entry.getTaxonomy() != null)
return entry.getTaxonomy().toString();
}
}
catch(Exception e) {
if(errorCount<5) {
MySleep.myWait(1000);
errorCount+=1;
logger.debug("Entries From UniProt IDs trial {}",errorCount);
return UniprotAPIExtension.findTaxonmyByOrganismName(name, errorCount);
}
else {
logger.error("Could not retrieve entries list. Returning null. {}",name);
logger.trace("StackTrace {}",e);
return null;
}
}
return null;
}
}
package biosynth;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.Test;
import connection.TcdbRetriever;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import tcdb.tcdbTransportTypesRetriever.FastaTcdb;
import tcdb.tcdbTransportTypesRetriever.ReadFastaTcdb;
import triageDatabase.PopulateTriageNeo4jDatabase;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
class Uniprot {
@Test
void test() throws InterruptedException, IOException {
// PopulateTriageNeo4jDatabase.getOrganismsFromTCDB();
// TcdbRetriever.getSubstrates();
// Map<String, FastaTcdb> fasta = ReadFastaTcdb.readfasta(true);
//
// System.out.println(fasta.size());
// for(String key : fasta.keySet())
// System.out.println(key + "\t" + fasta.get(key).getOrganism());
}
// @Test
void test2() {
List<String> toSearch = new ArrayList<>();
toSearch.add("Q97XW7"); //[Q97xw7, B0R9X2, P87020, O07620, Q7JZR2]
List<UniProtEntry> res = UniProtAPI.getEntriesFromUniProtIDs(toSearch, 0);
for(UniProtEntry entry : res) {
if(entry != null)
System.out.println(entry.getTaxonomy());
else
System.out.println("NULL");
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment