Git Lab CI for docker build enabled! You can enable it using .gitlab-ci.yml in your project. Check file template at https://gitlab.bio.di.uminho.pt/snippets/5

Commit 78245c10 authored by dlagoa's avatar dlagoa
Browse files

initial commit

parents
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="/merlin-core/lib/aibench-optflux-1.1.0.jar"/>
<classpathentry kind="output" path="target/classes"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>merlin-automatic-annotation</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>pt.uminho.ceb.biosystems.merlin</groupId>
<artifactId>merlin-automatic-annotation</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>merlin-automatic-annotation</name>
<url>http://maven.apache.org</url>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.build.timestamp.format>yyyy</maven.build.timestamp.format>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.7</version>
<executions>
<execution>
<id>merlin-reconstruction</id>
<phase>compile</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<!-- <outputDirectory>../merlin-core/plugins_bin/merlin-draft-reconstruction</outputDirectory> -->
<outputDirectory>../merlin-core/resources/merlin-automatic-annotation</outputDirectory>
<resources>
<resource>
<directory>target/classes</directory>
<filtering>false</filtering>
</resource>
<resource>
<directory>src/main/resources</directory>
<filtering>false</filtering>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<inherited>false</inherited>
<configuration>
<stripVersion>true</stripVersion>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>pt.uminho.ceb.biosystems.merlin</groupId>
<artifactId>merlin</artifactId>
<version>3.9.3</version>
</dependency>
</dependencies>
</project>
package pt.uminho.ceb.biosystems.merlin.merlin_automatic_annotation;
import java.io.FileOutputStream;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import es.uvigo.ei.aibench.core.operation.annotation.Cancel;
import es.uvigo.ei.aibench.core.operation.annotation.Direction;
import es.uvigo.ei.aibench.core.operation.annotation.Operation;
import es.uvigo.ei.aibench.core.operation.annotation.Port;
import es.uvigo.ei.aibench.core.operation.annotation.Progress;
import es.uvigo.ei.aibench.workbench.Workbench;
import pt.uminho.ceb.biosystems.merlin.core.datatypes.DataTable;
import pt.uminho.ceb.biosystems.merlin.core.datatypes.GenericDataTable;
import pt.uminho.ceb.biosystems.merlin.core.datatypes.annotation.EnzymesAnnotationDataInterface;
import pt.uminho.ceb.biosystems.merlin.core.gui.CustomGUI;
import pt.uminho.ceb.biosystems.merlin.core.utilities.MerlinUtils;
import pt.uminho.ceb.biosystems.merlin.database.connector.databaseAPI.HomologyAPI;
import pt.uminho.ceb.biosystems.merlin.database.connector.datatypes.Connection;
import pt.uminho.ceb.biosystems.merlin.utilities.TimeLeftProgress;
import pt.uminho.ceb.biosystems.merlin.utilities.io.FileUtils;
@Operation(name="enzymes automatic annotation", description="enzymes automatic annotation")
public class EnzymesAutomaticAnnotation {
private static final String ACCEPT_DEFAULT_NOTE = "default annotation";
private static final String REJECT_MESSAGE = "rejected";
private static final String ERROR_MESSAGE = "an error occurred while evaluating";
private static final String SPECIES = "species";
private static final String GENUS = "genus";
private String[] listConfidenceLevel = {"A", "B", "C", "D", "E", "F", "G", "H", "I"}; //confidence levels to be assigned to the result
private List<String> inputColumn1 = new ArrayList<>(); //ComboBox species/genus
private List<String> inputColumn2 = new ArrayList<>(); //ComboBox list of species/genus available
private List<Double> inputColumn3 = new ArrayList<>(); //TextField e-value
private List<Boolean> inputColumn4 = new ArrayList<>(); //CheckBox reviewed
private Boolean inputAcceptDefault;
private String blastDatabase;
private EnzymesAnnotationDataInterface homologyDataContainer;
private Map<Integer, String> locusTag;
private Map<Integer, String> geneName;
private Map<Integer, String> ecMap;
private Map<Integer, String> confLevelMap;
private AtomicBoolean cancel = new AtomicBoolean();
private TimeLeftProgress progress = new TimeLeftProgress();
private int locusTagColumn = 1;
private int geneNameColumn = 3;
@Port(direction=Direction.INPUT, name="blastDatabase", order=1)
public void setEcCurated(String blastDatabase){
this.blastDatabase = blastDatabase;
};
@Port(direction=Direction.INPUT, name="inputColumn1", order=2)
public void setinputColumn1(ArrayList<String> inputColumn1){
this.inputColumn1 = inputColumn1;
};
@Port(direction=Direction.INPUT, name="inputColumn2", order=3)
public void setinputColumn2(ArrayList<String> inputColumn2){
this.inputColumn2 = inputColumn2;
};
@Port(direction=Direction.INPUT, name="inputColumn3", order=4)
public void setinputColumn3(ArrayList<Double> inputColumn3){
this.inputColumn3 = inputColumn3;
};
@Port(direction=Direction.INPUT, name="inputColumn4", order=5)
public void setinputColumn4(List<Boolean> inputColumn4){
this.inputColumn4 = inputColumn4;
};
@Port(direction=Direction.INPUT, name="inputAcceptDefault", order=6)
public void setInputAcceptDefault(Boolean inputAcceptDefault){
this.inputAcceptDefault = inputAcceptDefault;
};
@Port(direction=Direction.INPUT, name="homologyDataContainer", order=7)
public void setEnzymesAnnotationDataInterface(EnzymesAnnotationDataInterface homologyDataContainer){
try {
this.homologyDataContainer = homologyDataContainer;
Connection connection = homologyDataContainer.getConnection();
Statement statement = connection.createStatement();
int continueQuestion = CustomGUI.stopQuestion("continue?", "all annotations previously saved in the database will be lost, do you wish to continue?", new String[]{"yes", "no"});
if(continueQuestion==0) {
HomologyAPI.deleteHomologyData(blastDatabase, statement);
Set<Integer> hits = getAllHits(statement);
applyPipelineOptions(hits);
}
}
catch (Exception e) {
e.printStackTrace();
Workbench.getInstance().error("An error occurred while performing the evaluation!");
}
};
public Set<Integer> getAllHits(Statement statement){
Set<Integer> result = new HashSet<>();
try {
result = HomologyAPI.getSKeyForAutomaticAnnotation(blastDatabase, statement);
}
catch (SQLException e) {
e.printStackTrace();
}
return result;
}
public void applyPipelineOptions(Set<Integer> hits) {
Map<Integer, Integer> sKeyToRow = homologyDataContainer.getReverseKeys();
long startTime = GregorianCalendar.getInstance().getTimeInMillis();
ecMap = new HashMap<>();
confLevelMap = new HashMap<>();
geneName = new HashMap<>();
locusTag = new HashMap<>();
int p = 0;
GenericDataTable mainTableData = homologyDataContainer.getAllGenes(blastDatabase, true);
for(Integer sKey : hits) {
if(!this.cancel.get()) {
p++;
if(sKeyToRow.containsKey(sKey)) {
int row = sKeyToRow.get(sKey);
boolean resultNotFound = true;
DataTable dataTable = homologyDataContainer.getDataTable(row);
int dataSize = dataTable.getRowCount();
for(int k=0; k < inputColumn1.size() && resultNotFound; k++) { //indice a ir buscar nas listas do pipeline
for(int i = 0; i < dataSize && resultNotFound; i++) { //cada linha das annotations
String confidenceLevel = listConfidenceLevel[k];
boolean reviewed = Integer.valueOf((String) dataTable.getValueAt(i, 2)) != 0;
String organism = (String) dataTable.getValueAt(i, 3);
double eValue = Double.valueOf((String) dataTable.getValueAt(i, 4));
String ecNumbers = (String)dataTable.getValueAt(i, 7);
String firstInput = inputColumn1.get(k);
String secondInput = inputColumn2.get(k).trim();
double thirdEvalue = inputColumn3.get(k);
boolean forthReviewed = inputColumn4.get(k);
boolean goEvalue = thirdEvalue >= eValue ;
boolean goReviewed = forthReviewed == reviewed;
if(firstInput.equalsIgnoreCase(SPECIES)) {
if(secondInput.equals("any")) { //we only have to check the e-value and if the entry is reviewed; accept the first one that meets the requirements
if(goEvalue && goReviewed) {
ecMap.put(sKey, getEcNumber(ecNumbers, mainTableData, row));
confLevelMap.put(sKey, confidenceLevel);
resultNotFound = false;
}
}
else {
if(secondInput.equalsIgnoreCase(organism) && goEvalue && goReviewed) {
ecMap.put(sKey, getEcNumber(ecNumbers, mainTableData, row));
confLevelMap.put(sKey, confidenceLevel);
resultNotFound = false;
}
}
}
if(firstInput.equalsIgnoreCase(GENUS)) {
String[] organismSplit = (organism).split(" ");
String getGenus = organismSplit[0].trim();
if(secondInput.equals("any")) {
if(goEvalue && goReviewed) {
ecMap.put(sKey, getEcNumber(ecNumbers, mainTableData, row));
confLevelMap.put(sKey, confidenceLevel);
resultNotFound = false;
}
}
else{
if(secondInput.equals(getGenus) && goEvalue && goReviewed) {
ecMap.put(sKey, getEcNumber(ecNumbers, mainTableData, row));
confLevelMap.put(sKey, confidenceLevel);
resultNotFound = false;
}
}
}
}
}
if(resultNotFound == true) {
if(inputAcceptDefault) {
this.ecMap.put(sKey, null);
this.confLevelMap.put(sKey, ACCEPT_DEFAULT_NOTE);
}
else {
ecMap.put(sKey, "");
confLevelMap.put(sKey, REJECT_MESSAGE);
}
}
String name = (String) mainTableData.getValueAt(row, geneNameColumn);
String locus = (String) mainTableData.getValueAt(row, locusTagColumn);
if(name == null)
name = "";
if(locus == null)
locus = "";
geneName.put(sKey, name);
locusTag.put(sKey, locus);
}
else {
if(sKey != null) {
ecMap.put(sKey, "");
confLevelMap.put(sKey, ERROR_MESSAGE);
}
}
if(this.cancel.get())
p = hits.size();
progress.setTime(GregorianCalendar.getInstance().getTimeInMillis() - startTime, p, hits.size());
}
}
if(!this.cancel.get())
saveResult();
}
public void saveResult() {
try {
long startTime = GregorianCalendar.getInstance().getTimeInMillis();
progress.setTime(0, 0, 0, "saving results");
Connection connection = homologyDataContainer.getConnection();
HomologyAPI.insertAutomaticEnzymeAnnotation(connection, locusTag, geneName, ecMap, confLevelMap);
////////////
progress.setTime(0, 0, 0, "saving report");
String path = FileUtils.getWorkspaceTaxonomyFolderPath(this.homologyDataContainer.getProject().getDatabase().getDatabaseName(), this.homologyDataContainer.getProject().getTaxonomyID());
Calendar cal = new GregorianCalendar();
// Get the components of the time
String hour24 = String.valueOf(cal.get(Calendar.HOUR_OF_DAY)); // 0..23
String min = String.valueOf(cal.get(Calendar.MINUTE)); // 0..59
String day = String.valueOf(cal.get(Calendar.DAY_OF_YEAR)); //0..365
int rowCounter = 0, columnCounter = 0;
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet(EnzymesAutomaticAnnotation.class.getSimpleName().toString());
HSSFRow excelRow = sheet.createRow(rowCounter++);
excelRow.createCell(columnCounter++).setCellValue("genes");
excelRow.createCell(columnCounter++).setCellValue("annotation");
excelRow.createCell(columnCounter++).setCellValue("score");
excelRow.createCell(columnCounter++).setCellValue("confidence level");
excelRow.createCell(columnCounter++).setCellValue("previous annotation status");
excelRow.createCell(columnCounter++).setCellValue("previous annotation");
excelRow.createCell(columnCounter++).setCellValue("score");
for(int key : ecMap.keySet()) {
columnCounter=0;
Integer row = homologyDataContainer.getReverseKeys().get(key);
String currentAnnotation = this.homologyDataContainer.getItemsList().get(1).get(row),
newAnnotation = this.ecMap.get(key);
if(newAnnotation == null && this.confLevelMap.get(key).equals(ACCEPT_DEFAULT_NOTE))
newAnnotation = currentAnnotation;
excelRow = sheet.createRow(rowCounter++);
excelRow.createCell(columnCounter++).setCellValue(this.locusTag.get(key));
excelRow.createCell(columnCounter++).setCellValue(newAnnotation);
if(newAnnotation != null)
excelRow.createCell(columnCounter++).setCellValue(homologyDataContainer.getECPercentage(newAnnotation,row));
else
excelRow.createCell(columnCounter++).setCellValue("");
excelRow.createCell(columnCounter++).setCellValue(this.confLevelMap.get(key));
String status = "distinct";
if(newAnnotation != null && newAnnotation.equalsIgnoreCase(currentAnnotation))
status = "same";
excelRow.createCell(columnCounter++).setCellValue(status);
excelRow.createCell(columnCounter++).setCellValue(currentAnnotation);
if(currentAnnotation != null)
excelRow.createCell(columnCounter++).setCellValue(homologyDataContainer.getECPercentage(currentAnnotation,row));
else
excelRow.createCell(columnCounter++).setCellValue("");
progress.setTime(GregorianCalendar.getInstance().getTimeInMillis() - startTime, rowCounter, ecMap.keySet().size(), "saving report");
}
/////////////////////////
rowCounter = 0;
columnCounter = 0;
sheet = wb.createSheet("Worflow configuration");
excelRow = sheet.createRow(rowCounter++);
excelRow.createCell(columnCounter++).setCellValue("taxa type");
excelRow.createCell(columnCounter++).setCellValue("taxon");
excelRow.createCell(columnCounter++).setCellValue("e-value");
excelRow.createCell(columnCounter++).setCellValue("UniProt status");
for(int k=0; k < inputColumn1.size(); k++) {
columnCounter=0;
excelRow = sheet.createRow(rowCounter++);
excelRow.createCell(columnCounter++).setCellValue(inputColumn1.get(k));
excelRow.createCell(columnCounter++).setCellValue(inputColumn2.get(k));
excelRow.createCell(columnCounter++).setCellValue(inputColumn3.get(k));
excelRow.createCell(columnCounter++).setCellValue(Boolean.valueOf(inputColumn4.get(k)));
}
excelRow = sheet.createRow(rowCounter++);
excelRow = sheet.createRow(rowCounter++);
columnCounter=0;
excelRow.createCell(columnCounter++).setCellValue("Accept default annotation if no match is found");
excelRow.createCell(columnCounter++).setCellValue(Boolean.valueOf(this.inputAcceptDefault));
String excelFileName = path.concat(EnzymesAutomaticAnnotation.class.getSimpleName().concat("_").concat(blastDatabase).concat(hour24).concat("_").concat(min).concat("_").concat(day).concat(".xls"));
FileOutputStream fileOut = new FileOutputStream(excelFileName);
wb.write(fileOut);
fileOut.flush();
wb.close();
fileOut.close();