Commit 7144a544 authored by iquasere's avatar iquasere
Browse files

Started the introduction of binning QC

Fixed the dockerfile
parent 4d966179
......@@ -6,5 +6,5 @@ save_non_project_files = False
[main]
version = 0.1.0
recent_files = ['D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\mosca.py', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\annotation.py', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\install.bash', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\kegg_pathway.py']
recent_files = ['D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\mosca.py', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\annotation.py', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\Dockerfile', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\install.bash', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\metaproteomics_analyser.py', 'D:\\Documentos\\Trabalho\\MOSCA_entire_project\\MOSCA_dev\\scripts\\binning.py']
......@@ -20,16 +20,17 @@ RUN buildDeps='build-essential zlib1g-dev' \
&& svn export https://github.com/timflutre/trimmomatic/trunk/adapters /MOSCA/Databases/illumina_adapters \
&& conda install megahit \
&& conda install -c bioconda spades \
&& conda install quast \
&& conda install fraggenescan \
&& conda install diamond \
# && conda install -c bioconda quast \ # TODO - introduce version control so quast can be installed through conda
&& pip install quast \
&& conda install -c bioconda fraggenescan \
&& conda install -c bioconda diamond \
&& conda install -c conda-forge progressbar33 \
&& conda install -c bioconda htseq \
&& conda install -c bioconda bowtie2 \
#&& git clone -b devel https://github.com/claczny/VizBin.git \
&& conda install -c bioconda maxbin2 \
&& conda install -c anaconda biopython \
&& conda install -c anaconda reportlab \
&& conda install -c bioconda bioconductor-deseq2 \
&& conda install -c bioconda bioconductor-genomeinfodbdata \
&& conda install bioconductor-edger \
&& conda install -c bioconda r-pheatmap \
&& conda install -c r r-rcolorbrewer \
......@@ -38,11 +39,14 @@ RUN buildDeps='build-essential zlib1g-dev' \
&& conda install -c conda-forge tqdm \
&& conda install scikit-learn \
&& conda install -c bioconda blast \
&& mkdir /MOSCA/Databases/annotation_databases \
&& mkdir -p /MOSCA/Databases/annotation_databases \
&& mkdir /input_data \
&& mkdir -p /MOSCA/Databases/COG \
&& mkdir /MOSCA/Databases/COG \
&& wget ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.tar.gz -P /MOSCA/Databases/COG \
&& tar -xzvf /MOSCA/Databases/COG/cdd.tar.gz --wildcards --no-anchored 'COG*.smp' -C /MOSCA/Databases/COG \
# && tar -xzvf /MOSCA/Databases/COG/cdd.tar.gz --wildcards --no-anchored 'COG*.smp' -C /MOSCA/Databases/COG \
&& cd MOSCA/Databases/COG \
&& tar -xzvf cdd.tar.gz --wildcards --no-anchored 'COG*.smp' \
&& cd ../../.. \
&& rm /MOSCA/Databases/COG/cdd.tar.gz \
&& wget ftp://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/cddid.tbl.gz -P /MOSCA/Databases/COG \
&& gunzip /MOSCA/Databases/COG/cddid.tbl.gz \
......
......@@ -9,7 +9,7 @@ conda config --add channels bioconda
conda config --add channels conda-forge
conda install -y -c bioconda fastqc
conda install -y -c bioconda sortmerna
#conda install -y -c anaconda svn
conda install -y -c anaconda svn
apt install -y subversion
svn export https://github.com/biocore/sortmerna/trunk/rRNA_databases MOSCA/Databases/rRNA_databases
find MOSCA/Databases/rRNA_databases/* | grep -v ".fasta" | xargs rm -fr
......@@ -20,7 +20,7 @@ conda install -y -c bioconda trimmomatic
svn export https://github.com/timflutre/trimmomatic/trunk/adapters MOSCA/Databases/illumina_adapters
conda install -y -c bioconda megahit
conda install -y -c bioconda spades
# conda install -y -c bioconda quast
# conda install -y -c bioconda quast # TODO - introduce version control so quast can be installed through conda
pip install -y quast
conda install -y -c bioconda fraggenescan
conda install -y -c bioconda diamond
......@@ -28,16 +28,12 @@ conda install -y -c conda-forge progressbar33
conda install -y -c bioconda htseq
conda install -y -c bioconda bowtie2
conda install -y -c bioconda maxbin2
curl -L -O https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz
tar xzf checkm_data_2015_01_16.tar.gz
conda install -y -c bioconda checkm-genome
conda install -y -c anaconda biopython
conda install -y -c anaconda reportlab
#git clone -b devel https://github.com/claczny/VizBin.git
#conda install -c bioconda ant
#conda install -c cyclus java-jdk
#cd VizBin/src/interface/VizBin
#ant jar
#cd ../../../..
conda install -y -c bioconda bioconductor-deseq2=1.22.1
conda install -y -c bioconda bioconductor-deseq2 #=1.22.1
#conda install -y -c bioconda bioconductor-genomeinfodb # genomeinfodb might be required after deseq
#conda install -y -c bioconda bioconductor-genomeinfodbdata # genomeinfodbdata might be required after deseq
conda install -y -c bioconda bioconductor-edger
......@@ -55,7 +51,6 @@ wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/c
gunzip uniprot_trembl.fasta.gz uniprot_sprot.fasta.gz
cat uniprot_trembl.fasta uniprot_sprot.fasta > MOSCA/Databases/annotation_databases/uniprot.fasta
rm uniprot_trembl.fasta uniprot_sprot.fasta uniprot_trembl.fasta.gz uniprot_sprot.fasta.gz
#tested to here
mkdir -p MOSCA/Databases/COG
wget ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.tar.gz -P MOSCA/Databases/COG
cd MOSCA/Databases/COG
......@@ -67,7 +62,7 @@ wget ftp://ftp.ncbi.nlm.nih.gov/pub/COG/COG/fun.txt -P MOSCA/Databases/COG
wget ftp://ftp.ncbi.nlm.nih.gov/pub/COG/COG/whog -P MOSCA/Databases/COG
wget https://github.com/aleimba/bac-genomics-scripts/raw/master/cdd2cog/cdd2cog.pl -P MOSCA/scripts
#sed -i '302s#.*# my $pssm_id = $1 if $line[1] =~ /^gnl\\|CDD\\|(\\d+)/; \# get PSSM-Id from the subject hit#' MOSCA/cdd2cog.pl # Sometimes this is needed... will save when use of uninitialized value floods the screen - when they change from CDD:number to gnl|CDD|number
# Metaproteomics
# Proteomics
apt-get install -y libpwiz-tools
wget http://genesis.ugent.be/maven2/eu/isas/searchgui/SearchGUI/3.3.16/SearchGUI-3.3.16-mac_and_linux.tar.gz
tar -xzvf SearchGUI-3.3.16-mac_and_linux.tar.gz
......@@ -77,7 +72,4 @@ conda install -y -c bioconda maxquant
# Krona plotting
git clone https://github.com/marbl/Krona.git
cd Krona/KronaTools/
perl install.pl
# Troubleshoot
# svn command not found
\ No newline at end of file
perl install.pl
\ No newline at end of file
......@@ -445,6 +445,16 @@ class Binner:
int).shift(axis=1)
del result['Unnamed: 0']
return result
'''
Input:
Output:
'''
def run_check_m(self, bins_folder, output_directory):
mtools.run_command('checkm data setRoot .')
mtools.run_command('checkm lineage_wf -x fasta -r --ali --nt -t {0}' +
'--pplacer_threads {0} {1} {2}'.format(self.threads, bins_folder, output_directory))
if __name__ == '__main__':
......
......@@ -261,7 +261,7 @@ class MetaproteomicsAnalyser:
the "file" file will be updated with the new parameters
'''
def edit_maxquant_mqpar(self, mqpar, fasta_database, spectra_folder,
experiment_names, threads = 1):
experiment_names, threads = 1, file_type = 'raw'):
print('Updating parameters file information.')
parser = etree.XMLParser(remove_blank_text=True)
tree = etree.parse(mqpar, parser)
......@@ -280,7 +280,7 @@ class MetaproteomicsAnalyser:
fractions = root.find("fractions")
ptms = root.find("ptms")
paramGroupIndices = root.find("paramGroupIndices")
files = mtools.sort_alphanumeric(glob.glob(spectra_folder + '/*.wiff'))
files = mtools.sort_alphanumeric(glob.glob('{}/*.{}'.format(spectra_folder, file_type)))
for i in range(len(files)):
print('Adding file: ' + files[i])
etree.SubElement(filePaths, 'string').text = files[i]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment