Commit 5cedc65c authored by iquasere's avatar iquasere
Browse files

mrna now aligns to ORFs

Data is now grouped again by Entry before generating expression_matrix, because COGs expand it, making duplicate row names
parent 861f7dde
......@@ -291,7 +291,7 @@ rule entry_count:
threads:
1
run:
uniprotinfo=pd.read_csv(input.uniprotinfo[0], sep='\t')
uniprotinfo = pd.read_csv(input.uniprotinfo[0], sep='\t')
result = pd.DataFrame(columns=['sseqid'])
i = 1
for blast in input.blasts:
......
......@@ -31,7 +31,7 @@ class Joiner:
def run(self):
args = self.get_arguments()
rscript_folder = '{}/../../../bin/'.format(sys.path[0])
rscript_folder = '{}/../../../bin/'.format(sys.path[0]) # TODO - this should have better fix than this hack
experiments = (pd.read_csv(args.experiments, sep='\t') if args.input_format == 'tsv' else
pd.read_excel(args.experiments))
......@@ -128,9 +128,8 @@ class Joiner:
# For each sample, write an Entry Report
multi_sheet_excel('{}/MOSCA_Entry_Report.xlsx'.format(args.output), data, sheet_name=sample)
data = pd.read_excel('{}/MOSCA_Entry_Report.xlsx'.format(args.output), sheet_name = sample + ' (1)')
data[['Entry'] + expression_analysed].to_csv('{}/Metatranscriptomics/expression_matrix.tsv'.format(
args.output), sep='\t', index=False)
data[['Entry'] + expression_analysed].groupy('Entry')[expression_analysed].sum().reset_index().to_csv(
'{}/Metatranscriptomics/expression_matrix.tsv'.format(args.output), sep='\t', index=False)
for mg_name in sample2mgname[sample]:
# Draw the taxonomy krona plot
......
......@@ -67,11 +67,11 @@ class QuantificationAnalyser:
if experiments.iloc[i]['Data type'] == 'mrna':
attribute = 'Name'
folder = 'Metatranscriptomics'
reference = '{}/Assembly/{}/contigs.fasta'.format(args.output, experiments.iloc[i]['Sample'])
reference = '{}/Annotation/{}/fgs.ffn'.format(args.output, experiments.iloc[i]['Sample'])
elif experiments.iloc[i]['Data type'] == 'dna':
attribute = 'gene_id'
folder = 'Annotation'
reference = '{}/Annotation/{}/fgs.faa'.format(args.output, experiments.iloc[i]['Sample'])
reference = '{}/Assembly/{}/contigs.fasta'.format(args.output, experiments.iloc[i]['Sample'])
else:
print('A data type MOSCA can yet not handle!')
continue
......@@ -81,8 +81,7 @@ class QuantificationAnalyser:
if not os.path.isfile("{}/{}/{}.readcounts".format(args.output, folder, experiments.iloc[i]['Name'])):
print("{}/{}/{}.readcounts not found! Generating it".format(args.output, folder,
experiments.iloc[i]['Name']))
perform_alignment(
'{}/Assembly/{}/contigs.fasta'.format(args.output, experiments.iloc[i]['Sample']),
perform_alignment(reference,
['{}/Preprocess/Trimmomatic/quality_trimmed_{}_{}_paired.fq'.format(
args.output, experiments.iloc[i]['Name'], fr) for fr in ['forward', 'reverse']],
'{}/{}/{}'.format(args.output, folder, experiments.iloc[i]['Name']),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment