Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tools/rasusa/.lint_skip

This file was deleted.

181 changes: 103 additions & 78 deletions tools/rasusa/rasusa.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
<tool id="rasusa" name="rasusa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<tool id="rasusa" name="rasusa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Randomly subsample reads to a specified coverage</description>
<macros>
<token name="@TOOL_VERSION@">3.0.0</token>
<token name="@TOOL_VERSION@">4.0.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">25.0</token>
<token name="@FORMATS@">fastqsanger,fastqsanger.gz,fasta,fasta.gz</token>
<xml name="size_units">
<option value="b">bases</option>
Expand Down Expand Up @@ -52,7 +53,7 @@
--frac $subsample.frac
#end if
#if $r1_ext.endswith(".gz") or $r2_ext.endswith(".gz")
--output-type g
--compress-type g
#end if ]]>
</token>
</macros>
Expand All @@ -65,51 +66,57 @@
</requirements>

<command detect_errors="exit_code"><![CDATA[
#if str( $input.input_selector ) == "aligned":
ln -s '$bam' 'input.bam' &&
ln -s '$bam.metadata.bam_index' 'input.bam.bai' &&
rasusa aln
--coverage $input.coverage
--step-size $input.step_size
#if str($input.input_selector) == "aligned":
#if '$aligned_input.ext' == "bam":
ln -s '$aligned_input.metadata.bam_index' 'input.bam.bai' &&
#end if
ln -s '$aligned_input' input.$aligned_input.ext &&
rasusa aln
--coverage $input.coverage
--step-size $input.step_size
#else:
rasusa reads
rasusa reads
#end if

#if $seed
-s $seed
-s $seed
#end if

#if str( $input.input_selector ) == "paired":
#if str($input.input_selector) == "paired":
#set r1_ext = $input.reads1.extension
#set r2_ext = $input.reads2.extension
-o 'paired_out1.$r1_ext'
-o 'paired_out2.$r2_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.reads1}'
'${input.reads2}' &&
mv 'paired_out1.$r1_ext' '$paired_output1' &&
mv 'paired_out2.$r2_ext' '$paired_output2'
-o 'paired_out1.$r1_ext'
-o 'paired_out2.$r2_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.reads1}'
'${input.reads2}' &&
mv 'paired_out1.$r1_ext' '$paired_output1' &&
mv 'paired_out2.$r2_ext' '$paired_output2'

#elif str( $input.input_selector ) == "paired_collection":
#elif str($input.input_selector) == "paired_collection":
#set r1_ext = $input.collection.forward.extension
#set r2_ext = $input.collection.reverse.extension
-o 'paired_out1.$r1_ext'
-o 'paired_out2.$r2_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.collection.forward}'
'${input.collection.reverse}' &&
mv 'paired_out1.$r1_ext' '${collection_output.forward}' &&
mv 'paired_out2.$r2_ext' '${collection_output.reverse}'
-o 'paired_out1.$r1_ext'
-o 'paired_out2.$r2_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.collection.forward}'
'${input.collection.reverse}' &&
mv 'paired_out1.$r1_ext' '${collection_output.forward}' &&
mv 'paired_out2.$r2_ext' '${collection_output.reverse}'

#elif str( $input.input_selector ) == "single":
#elif str($input.input_selector) == "single":
#set r1_ext = $input.reads.extension
-o 'single_out.$r1_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.reads}' &&
mv 'single_out.$r1_ext' '$single_output'
-o 'single_out.$r1_ext'
@FASTQ_SUBSAMPLE_OPTIONS@
'${input.reads}' &&
mv 'single_out.$r1_ext' '$single_output'

#elif str( $input.input_selector ) == "aligned":
'input.bam' | samtools sort --no-PG -@ 1 -T '\${TMPDIR:-.}' -O bam -o '$bam_output' -
#elif str($input.input_selector) == "aligned":
#if str($input.aligned_output_format) == "bam":
--output-format bam 'input.$aligned_input.ext' | samtools sort --no-PG -@ 1 -T '\${TMPDIR:-.}' -O bam -o '$bam_output' -
#elif str($input.aligned_output_format) == "sam":
--output-format sam 'input.$aligned_input.ext' | samtools sort --no-PG -@ 1 -T '\${TMPDIR:-.}' -O sam -o '$sam_output' -
#end if
#end if
]]></command>
<inputs>
Expand All @@ -118,7 +125,7 @@ mv 'single_out.$r1_ext' '$single_output'
<option value="paired">Paired-end FASTQ</option>
<option value="single">Single-end FASTQ</option>
<option value="paired_collection">Paired FASTQ Collection</option>
<option value="aligned">BAM file of aligned reads</option>
<option value="aligned">BAM/SAM file of aligned reads</option>
Comment thread
RZ9082 marked this conversation as resolved.
Outdated
</param>
<when value="paired">
<param name="reads1" type="data" format="@FORMATS@" label="Select first set of reads" help="Specify dataset with forward reads"/>
Expand All @@ -134,10 +141,14 @@ mv 'single_out.$r1_ext' '$single_output'
<expand macro="params_fastq" />
</when>
<when value="aligned">
<param name="bam" format="sam,bam" type="data" label="Select BAM file(s) with alignments"/>
<param name="aligned_input" format="sam,bam" type="data" label="Select BAM/SAM file with alignments"/>
Comment thread
RZ9082 marked this conversation as resolved.
Outdated
<param argument="--coverage" type="integer" min="0" optional="true" value="" label="The desired depth of coverage to subsample the alignment to"/>
<param type="integer" argument="--step-size" value="100" label="When a region has less than the desired coverage, the step size to move along the chromosome to find more reads."
help="The lowest of the step and the minimum end coordinate of the reads in the region will be used. This parameter can have a significant impact on the runtime of the subsampling process."/>
<param type="select" name="aligned_output_format" label="Select desired output format">
<option value="bam" selected="true">BAM</option>
<option value="sam">SAM</option>
</param>
</when>
</conditional>
<param type="integer" argument="--seed" optional="true" label="Random seed to use"/>
Expand All @@ -158,7 +169,10 @@ mv 'single_out.$r1_ext' '$single_output'
<data name="reverse" label="${tool.name} on ${input.collection.reverse.name}: paired-end R2" format_source="collection['reverse']"/>
</collection>
<data name="bam_output" label="${tool.name} on ${on_string}: BAM" format="bam">
<filter>input['input_selector'] == 'aligned'</filter>
<filter>input['input_selector'] == 'aligned' and input['aligned_output_format'] == 'bam'</filter>
</data>
<data name="sam_output" label="${tool.name} on ${on_string}: SAM" format="sam">
<filter>input['input_selector'] == 'aligned' and input['aligned_output_format'] == 'sam'</filter>
</data>
</outputs>
<tests>
Expand All @@ -167,12 +181,12 @@ mv 'single_out.$r1_ext' '$single_output'
<conditional name="input">
<param name="input_selector" value="single"/>
<param name="reads" value="r1.fastq.gz"/>
</conditional>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="b"/>
<param name="genome_size" value="1000"/>
<param name="coverage" value="1"/>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="b"/>
<param name="genome_size" value="1000"/>
<param name="coverage" value="1"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output name="single_output" value="single_by_coverage_b.fastq.gz" ftype="fastqsanger.gz"/>
Expand All @@ -183,12 +197,12 @@ mv 'single_out.$r1_ext' '$single_output'
<param name="input_selector" value="paired"/>
<param name="reads1" value="r1.fastq.gz"/>
<param name="reads2" value="r2.fastq.gz"/>
</conditional>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="k"/>
<param name="genome_size" value="1"/>
<param name="coverage" value="1"/>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="k"/>
<param name="genome_size" value="1"/>
<param name="coverage" value="1"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output name="paired_output1" value="paired1_by_coverage_k.fastq.gz" ftype="fastqsanger.gz"/>
Expand All @@ -204,12 +218,12 @@ mv 'single_out.$r1_ext' '$single_output'
<element name="reverse" value="r2.fastq.gz"/>
</collection>
</param>
</conditional>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="m"/>
<param name="genome_size" value="0.001"/>
<param name="coverage" value="1"/>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="m"/>
<param name="genome_size" value="0.001"/>
<param name="coverage" value="1"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output_collection name="collection_output" type="paired">
Expand All @@ -222,12 +236,12 @@ mv 'single_out.$r1_ext' '$single_output'
<conditional name="input">
<param name="input_selector" value="single"/>
<param name="reads" value="r1.fasta.gz"/>
</conditional>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="g"/>
<param name="genome_size" value="0.001"/>
<param name="coverage" value="0.001"/>
<conditional name="subsample">
<param name="type" value="coverage"/>
<param name="genome_size_unit" value="g"/>
<param name="genome_size" value="0.001"/>
<param name="coverage" value="0.001"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output name="single_output" value="single_end_by_coverage_g.fasta" ftype="fasta.gz"/>
Expand All @@ -238,11 +252,11 @@ mv 'single_out.$r1_ext' '$single_output'
<param name="input_selector" value="paired"/>
<param name="reads1" value="r1.fastq"/>
<param name="reads2" value="r2.fastq"/>
</conditional>
<conditional name="subsample">
<param name="type" value="num_bases"/>
<param name="num_bases_unit" value="k"/>
<param name="bases" value="2"/>
<conditional name="subsample">
<param name="type" value="num_bases"/>
<param name="num_bases_unit" value="k"/>
<param name="bases" value="2"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output name="paired_output1" value="paired1_by_num_bases_k.fastq" ftype="fastqsanger"/>
Expand All @@ -254,10 +268,10 @@ mv 'single_out.$r1_ext' '$single_output'
<param name="input_selector" value="paired"/>
<param name="reads1" value="r1.fasta.gz"/>
<param name="reads2" value="r2.fasta.gz"/>
</conditional>
<conditional name="subsample">
<param name="type" value="num_reads"/>
<param name="num" value="5"/>
<conditional name="subsample">
<param name="type" value="num_reads"/>
<param name="num" value="5"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output name="paired_output1" value="paired1_by_num_reads.fasta.gz" ftype="fasta.gz"/>
Expand All @@ -273,10 +287,10 @@ mv 'single_out.$r1_ext' '$single_output'
<element name="reverse" value="r2.fasta"/>
</collection>
</param>
</conditional>
<conditional name="subsample">
<param name="type" value="frac_reads"/>
<param name="frac" value="0.6"/>
<conditional name="subsample">
<param name="type" value="frac_reads"/>
<param name="frac" value="0.6"/>
</conditional>
</conditional>
<param name="seed" value="1"/>
<output_collection name="collection_output" type="paired">
Expand All @@ -285,15 +299,26 @@ mv 'single_out.$r1_ext' '$single_output'
</output_collection>
</test>
<test expect_num_outputs="1">
<!-- test 8: bam input -->
<!-- test 8: bam input, bam output -->
<conditional name="input">
<param name="input_selector" value="aligned"/>
<param name="bam" value="input.bam" />
<param name="aligned_input" value="input.bam" />
<param name="coverage" value="1"/>
</conditional>
<param name="coverage" value="1"/>
<param name="seed" value="1"/>
<output name="bam_output" value="output.bam" ftype="bam"/>
</test>
<test expect_num_outputs="1">
<!-- test 9: bam input, sam output -->
<conditional name="input">
<param name="input_selector" value="aligned"/>
<param name="aligned_input" value="input.bam" />
<param name="coverage" value="1"/>
<param name="aligned_output_format" value="sam"/>
</conditional>
<param name="seed" value="1"/>
<output name="bam_output" value="output.sam" ftype="sam"/>
</test>
</tests>
<help><![CDATA[

Expand All @@ -306,4 +331,4 @@ specifying the size of the subset:
<citations>
<citation type="doi">10.21105/joss.03941</citation>
</citations>
</tool>
</tool>
Binary file modified tools/rasusa/test-data/output.bam
Binary file not shown.
Loading
Loading