Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 240bd7e3 authored by Davide Lagoa's avatar Davide Lagoa
Browse files

APIs moved to scraper

parent 006ee099
......@@ -18,12 +18,22 @@
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-10">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/5"/>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
org.eclipse.jdt.core.compiler.codegen.targetPlatform=9
org.eclipse.jdt.core.compiler.codegen.targetPlatform=10
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=9
org.eclipse.jdt.core.compiler.compliance=10
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
......@@ -11,4 +11,4 @@ org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.release=enabled
org.eclipse.jdt.core.compiler.source=9
org.eclipse.jdt.core.compiler.source=10
......@@ -271,13 +271,6 @@
<version>1.2.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/uk.ac.ebi.chebi.webapps.chebiWS.client/chebiWS-client -->
<dependency>
<groupId>uk.ac.ebi.chebi.webapps.chebiWS.client</groupId>
<artifactId>chebiWS-client</artifactId>
<version>2.4</version>
</dependency>
</dependencies>
<name>transyt-service</name>
</project>
package APIs;
import java.util.List;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.chebi.ChebiAPIInterface;
import uk.ac.ebi.chebi.webapps.chebiWS.client.ChebiWebServiceClient;
import uk.ac.ebi.chebi.webapps.chebiWS.model.DataItem;
import uk.ac.ebi.chebi.webapps.chebiWS.model.Entity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntity;
import uk.ac.ebi.chebi.webapps.chebiWS.model.LiteEntityList;
import uk.ac.ebi.chebi.webapps.chebiWS.model.SearchCategory;
import uk.ac.ebi.chebi.webapps.chebiWS.model.StarsCategory;
public class ChebiAPI extends ChebiAPIInterface{
static private ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
public static String getMetacycIDUsingExternalReference(String id) {
String identifier = null;
try {
ChebiWebServiceClient chebiClient = new ChebiWebServiceClient();
LiteEntityList entities = chebiClient.getLiteEntity(id, SearchCategory.MANUAL_XREFS, 1, StarsCategory.THREE_ONLY);
List<LiteEntity> resultList = entities.getListElement();
String chebiID = "";
for (LiteEntity liteEntity : resultList ) {
chebiID = liteEntity.getChebiId();
}
if(chebiID != null && !chebiID.isEmpty()) {
Entity entity = chebiClient.getCompleteEntity(chebiID);
List<DataItem> db = entity.getDatabaseLinks();
for ( DataItem dataItem : db ) { // List all synonyms
if(dataItem.getType().trim().equalsIgnoreCase("MetaCyc accession")) {
identifier = dataItem.getData();
break;
}
}
}
}
catch (Exception e1) {
e1.printStackTrace();
}
return identifier;
}
}
package APIs;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.TaxonomyContainer;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.ebi.uniprot.UniProtAPI;
import pt.uminho.ceb.biosystems.merlin.bioapis.externalAPI.utilities.MySleep;
import uk.ac.ebi.kraken.interfaces.uniprot.NcbiTaxonomyId;
import uk.ac.ebi.kraken.interfaces.uniprot.UniProtEntry;
import uk.ac.ebi.uniprot.dataservice.client.QueryResult;
import uk.ac.ebi.uniprot.dataservice.client.exception.ServiceException;
import uk.ac.ebi.uniprot.dataservice.client.uniprot.UniProtQueryBuilder;
import uk.ac.ebi.uniprot.dataservice.query.Query;
public class UniprotAPIExtension extends UniProtAPI{
final static Logger logger = LoggerFactory.getLogger(UniprotAPIExtension.class);
/**
* @param name
* @param errorCount
* @return
*/
public static String findTaxonmyByOrganismName(String name, int errorCount) {
UniProtAPI.getInstance();
try {
Query query = UniProtQueryBuilder.organismName("treponema");
QueryResult<UniProtEntry> entries = uniProtService.getEntries(query);
while (entries.hasNext()) {
UniProtEntry entry = entries.next();
if(!entry.getTaxonomy().toString().isEmpty() && entry.getTaxonomy() != null)
return entry.getTaxonomy().toString();
}
}
catch(Exception e) {
if(errorCount<5) {
MySleep.myWait(1000);
errorCount+=1;
logger.debug("Entries From UniProt IDs trial {}",errorCount);
return UniprotAPIExtension.findTaxonmyByOrganismName(name, errorCount);
}
else {
logger.error("Could not retrieve entries list. Returning null. {}",name);
logger.trace("StackTrace {}",e);
return null;
}
}
return null;
}
public static TaxonomyContainer getTaxonomyFromNCBITaxnomyID(int taxID, int errorCount) {
UniProtAPI.getInstance();
TaxonomyContainer result = new TaxonomyContainer();
try {
Query query = UniProtQueryBuilder.taxonID(taxID); //UniprotAPI uses UniProtQueryBuilder.gene() -> wrong
QueryResult<UniProtEntry> entries = uniProtService.getEntries(query);
while(entries.hasNext()) {
UniProtEntry uniProtEntry = entries.next();
List<NcbiTaxonomyId> taxa = uniProtEntry.getNcbiTaxonomyIds();
for(NcbiTaxonomyId taxon : taxa) {
if(taxon.getValue().equalsIgnoreCase(""+taxID)) {
result.setSpeciesName(uniProtEntry.getOrganism().getScientificName().getValue());
result.setTaxonomy(uniProtEntry.getTaxonomy());
return result;
}
}
}
return null;
}
catch(ServiceException e) {
if(errorCount<10) {
MySleep.myWait(1000);
errorCount = errorCount+1;
logger.debug("getTaxonomyFromNCBITaxnomyID trial {}",errorCount);
return getTaxonomyFromNCBITaxnomyID(taxID, errorCount+1);
}
else {
logger.error("getTaxonomyFromNCBITaxnomyID eror, returning null. {}",taxID);
logger.trace("StackTrace {}",e);
return null;
}
}
}
}
......@@ -52,6 +52,7 @@ import pt.uminho.ceb.biosystems.mew.biocomponents.container.io.readers.JSBMLRead
import pt.uminho.ceb.biosystems.mew.biocomponents.validation.io.JSBMLValidationException;
import relations.GPRAssociations;
import tcdb.capsules.GeneContainer;
import tcdb.capsules.Organism;
import tcdb.capsules.ReactionContainer;
import tcdb.capsules.Subunits;
import triageDatabase.TriageGeneralProperties;
......@@ -83,7 +84,6 @@ public class ProvideTransportReactionsToGenes {
private Map<String, Map<String, Set<String>>> finalResults;
private Map<String, ReactionContainer> reactionContainersByID;
private Properties properties;
private Integer taxID;
private Integer queryFileTotalOfGenes;
private String jobIdentification;
private Set<String> reactionsToIgnore;
......@@ -96,7 +96,7 @@ public class ProvideTransportReactionsToGenes {
private static final Logger logger = LoggerFactory.getLogger(WriteByMetabolitesID.class);
public ProvideTransportReactionsToGenes(Integer taxID, String queryPath, Map<String, String> modelMetabolites, Properties properties) {
public ProvideTransportReactionsToGenes(Organism organismProperties, String queryPath, Map<String, String> modelMetabolites, Properties properties) {
logger.info("TRIAGE initialized!");
......@@ -107,18 +107,20 @@ public class ProvideTransportReactionsToGenes {
tcNumbersNotPresentInTRIAGEdatabase = new HashSet<>();
reactionContainersByID = new HashMap<>();
this.taxonomy = organismProperties.getTaxonomy();
this.organism = organismProperties.getOrganism();
this.modelMetabolites = modelMetabolites;
this.properties = properties;
this.taxID = taxID;
this.jobIdentification = new File(queryPath).getName().replace(".faa", "");
subunits = new Subunits();
defaultLabel = getDefaultLabel();
findTaxonomyByTaxonomyID(taxID);
// findTaxonomyByTaxonomyID(taxID);
try {
Blast blast = new Blast(false, queryPath, properties);
Blast blast = new Blast(true, queryPath, properties);
blastResults = blast.getAlignmentsByQuery();
queryFileTotalOfGenes = blast.getQueryFileSize();
......@@ -131,30 +133,28 @@ public class ProvideTransportReactionsToGenes {
Transaction dataTx = graphDatabaseService.beginTx();
//////////////////////////
@SuppressWarnings("resource")
Scanner reader = new Scanner(System.in);
int n = 1;
while (n != 99) {
// tests();
start(); //uncomment
System.out.println("Insert a number to repeat or 99 to finish");
// @SuppressWarnings("resource")
// Scanner reader = new Scanner(System.in);
//
// int n = 1;
//
//
// while (n != 99) {
try {
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
start();
}
// System.out.println("Insert a number to repeat or 99 to finish");
//
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
/////////////////////////
dataTx.close();
service.shutdown();
......@@ -169,117 +169,6 @@ public class ProvideTransportReactionsToGenes {
}
}
private void tests() {
// try {
// Set<Node> list = service.getAllNodesByLabel(TriageNodeLabel.Reaction);
//
// Iterator<Node> iterator = list.iterator();
//
// Set<String> metabolites = new HashSet<>();
//
// while(iterator.hasNext()) {
//
// Node node = iterator.next();
//
// String reaction = node.getProperty(TriageGeneralProperties.ReactionBase.toString()).toString();
//
// boolean reversible = Boolean.valueOf(node.getProperty(TriageGeneralProperties.Reversible.toString()).toString());
//
// String regex = ReactionContainer.IRREV_TOKEN;
//
// if(reversible)
// regex = ReactionContainer.REV_TOKEN;
//
// String[] compounds = reaction.replaceAll(regex, " + ").split("\\s+\\+\\s+");
//
// for(String c : compounds)
// metabolites.add(c.replaceAll("=ModelSeed", "").replaceAll("\\(in\\)", "").replaceAll("\\(out\\)", "").trim());
//
// }
//
// System.out.println(metabolites.size());
// }
// catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
String tc = "4.A.6.1.1";
// String tc = "3.A.1.9.1";
Node tcNumberNode = service.findTcNumberNode(tc);
System.out.println(tc);
System.out.println("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!");
if(tcNumberNode != null) {
System.out.println(tcNumberNode.getAllProperties());
Iterable<Relationship> relations = tcNumberNode.getRelationships(TriageRelationshipType.has_reaction);
for(Relationship rel : relations) {
System.out.println(rel.getEndNode().getAllProperties());
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty("Reaction"));
if(rel.getEndNode().hasProperty(TriageGeneralProperties.ReactionModelSEED.toString()))
System.out.println(rel.getEndNode().getProperty("ReactionID") + "\t********\t" + rel.getEndNode().getProperty(TriageGeneralProperties.ReactionModelSEED.toString()));
System.out.println();
}
}
else
System.out.println("nulo");
}
private void findTaxonomyByTaxonomyID(Integer taxID) {
Map<Integer, String> taxonomyFile = FilesUtils.readMapFromFile3(properties.getTaxonomiesFilePath());
Map<Integer, String> organismFile = FilesUtils.readMapFromFile3(properties.getOrganismsFilePath());
if(taxonomyFile.containsKey(taxID) && organismFile.containsKey(taxID)) {
taxonomy = taxonomyFile.get(taxID).replaceAll("\\[", "").replaceAll("\\]", "").split(", ");
organism = organismFile.get(taxID);
}
else{
TaxonomyContainer uniprot = UniprotAPIExtension.getTaxonomyFromNCBITaxnomyID(taxID, 0);
if(uniprot != null) {
organism = uniprot.getSpeciesName();
taxonomy = uniprot.getTaxonomy().toString().replaceAll("\\[", "").replaceAll("\\]", "").split(", ");
taxonomyFile.put(taxID, Arrays.toString(taxonomy));
organismFile.put(taxID, organism);
FilesUtils.saveMapInFile2(properties.getTaxonomiesFilePath(), taxonomyFile);
FilesUtils.saveMapInFile2(properties.getOrganismsFilePath(), organismFile);
}
else {
//IMPLEMENTAR METODO QUE PEDE � PESSOA PARA INSERIR MANUALMENTE
logger.error("Taxonomy ID not recognized by UniProt! Please insert a valid TaxonomyID!");
logger.info("SHUTING DOWN TRIAGE...");
System.exit(0);
}
}
logger.info("Taxonomy: {}", Arrays.toString(taxonomy));
logger.info("Organism: {}", organism);
}
private void start() {
try {
......@@ -315,60 +204,16 @@ public class ProvideTransportReactionsToGenes {
logger.debug("Reactions search complete!");
Scanner reader = new Scanner(System.in);
int n = 1;
//
// while (n != 1001) {
Map<String, Map<String, String>> proteinComplexes = GPRAssociations.getGPR(service.findSubunitsInDatabase(tcNumbers), blastResults, genesContainers);
System.out.println("Insert a number to repeat or 1001 to finish");
// try {
// n = reader.nextInt();
// } catch (Exception e) {
// e.printStackTrace();
//
// n = reader.nextInt();
// }
//
// }
// reader.close();
// Map<String, String> reactionsIDS = null;
// Map<String, String> reactionsIDS = Reports.generateKBaseReport(jobIdentification, organism, taxID, queryFileTotalOfGenes, properties, finalResults, service);
//
// System.out.println("1");
//
// OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
//
// System.out.println("2");
//
// Container container = new Container(output);
// container.verifyDepBetweenClass();
// int n = 1;
//
while (n != 1001) {
try {
OutputMerlinFormat output = new OutputMerlinFormat(finalResults, modelMetabolites, reactionContainersByID);
// GPRAssociations.buildGeneRules(service, container, proteinComplexes, reactionContainersByID);
n = reader.nextInt();
} catch (Exception e) {
e.printStackTrace();
n = reader.nextInt();
}
}
// }
Container container = new Container(output);
container.verifyDepBetweenClass();
GPRAssociations.buildGeneRules(service, container, proteinComplexes, reactionContainersByID);
//
// System.out.println("3");
......
......@@ -30,12 +30,8 @@ public class GPRAssociations {
public static Map<String, Map<String, String>> getGPR(Map<String, Set<String>> complexesTCDB, Map<String, List<AlignmentCapsule>> blastResults, Map<String, GeneContainer> genesContainers) {
System.out.println("comeou");
Map<String, List<AlignmentCapsule>> filteredBlastResults = filterResults(blastResults, genesContainers);
System.out.println(filteredBlastResults.size());
Map<String, Map<String, Map<String, Double>>> resultsByTCnumber = new HashMap<>();
for(String gene : filteredBlastResults.keySet()) {
......@@ -77,12 +73,8 @@ public class GPRAssociations {
Map<String, String> assigned = new HashMap<>();
Map<String, Set<String>> invertedMapping = new HashMap<>();
System.out.println(tcNumber);
for(String acc : resultsByTCnumber.get(tcNumber).keySet()) {
System.out.println(resultsByTCnumber.get(tcNumber));
// else if(resultsByTCnumber.get(tcNumber).get(acc).size() == 1) {
//
......@@ -100,8 +92,6 @@ public class GPRAssociations {
}
System.out.println(invertedMapping);
if(!resultsByTCnumber.isEmpty())
assigned = findBestSubunits(resultsByTCnumber.get(tcNumber), assigned, invertedMapping);
......@@ -109,25 +99,10 @@ public class GPRAssociations {
GPR.put(tcNumber, assigned);
System.out.println(tcNumber + "\t" + GPR.get(tcNumber));
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
System.out.println();
}
}
System.out.println("tamanho dos resultados: " + GPR.size());
for(String key : GPR.keySet())
System.out.println(key + "\t" + GPR.get(key));
System.out.println("terminou");
return GPR;
......@@ -171,11 +146,8 @@ public class GPRAssociations {
*/
private static Map<String, String> findBestSubunits(Map<String, Map<String, Double>> data, Map<String, String> assigned, Map<String, Set<String>> invertedMapping){
System.out.println();
if(assigned.size() == data.size())
return assigned;
System.out.println("1");
Set<String> allAccessionRemaining = new HashSet<>(data.keySet());
......@@ -186,60 +158,41 @@ public class GPRAssociations {
}
allAccessionRemaining.removeAll(assigned.values());
System.out.println("2");
System.out.println(allAccessionRemaining);
Pair<String, String> pair = findBestGene(data, allAccessionRemaining, invertedMapping, assigned);
if(pair == null)
return null;
System.out.println(3);