diff --git a/tools/collapse_collection/.shed.yml b/tools/collapse_collection/.shed.yml new file mode 100644 index 00000000000..1d7ff8e05c1 --- /dev/null +++ b/tools/collapse_collection/.shed.yml @@ -0,0 +1,10 @@ +categories: [Text Manipulation] +description: Collapse a list collection into a single dataset, with options to keep a common header and prepend dataset names. +long_description: | + Concatenates every file in a list collection into a single output dataset, + preserving collection order. Supports header deduplication and dataset name + prepending in several placement modes. +name: collapse_collections +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/collapse_collection +homepage_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/collapse_collection diff --git a/tools/collapse_collection/collapse_collection.xml b/tools/collapse_collection/collapse_collection.xml new file mode 100644 index 00000000000..3727198eb93 --- /dev/null +++ b/tools/collapse_collection/collapse_collection.xml @@ -0,0 +1,206 @@ + + into single dataset in order of the collection + + macros.xml + + + gawk + + + $output + + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_ at the National Microbiology Laboratory (PHAC). + + ]]> + + @misc{phac_nml_galaxy_tools, + title={Galaxy Tools}, + author={{Public Health Agency of Canada, National Microbiology Laboratory}}, + url={https://github.com/phac-nml/galaxy_tools}, + note={Original source repository} + } + + diff --git a/tools/collapse_collection/macros.xml b/tools/collapse_collection/macros.xml new file mode 100644 index 00000000000..0bae76d9d21 --- /dev/null +++ b/tools/collapse_collection/macros.xml @@ -0,0 +1,7 @@ + + + diff --git a/tools/collapse_collection/static/images/collapse.png b/tools/collapse_collection/static/images/collapse.png new file mode 100644 index 00000000000..ce4dc8d2c5e Binary files /dev/null and b/tools/collapse_collection/static/images/collapse.png differ diff --git a/tools/collapse_collection/static/images/collapse.svg b/tools/collapse_collection/static/images/collapse.svg new file mode 100644 index 00000000000..bf455176628 --- /dev/null +++ b/tools/collapse_collection/static/images/collapse.svg @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + Input: A collection + + strain1.tsv + seq_namemedianmean... + mcr_15252.74 + mcr_201.61 + + strain2.tsv + seq_namemedianmean... + mcr_18585.62 + mcr_203.05 + + Collapse + header + names + + Output: A single dataset + Sampleseq_namemedianmean + + strain1.tsvmcr_15252.74 + strain1.tsvmcr_201.61 + + strain2.tsvmcr_18585.62 + strain2.tsvmcr_203.05 + single header kept, dataset name prepended as a column on every row + diff --git a/tools/collapse_collection/test-data/answer.txt b/tools/collapse_collection/test-data/answer.txt new file mode 100644 index 00000000000..6d9cbb40d45 --- /dev/null +++ b/tools/collapse_collection/test-data/answer.txt @@ -0,0 +1,8 @@ +first file +second +third +fourth line +second file +second +third +fourth line diff --git a/tools/collapse_collection/test-data/answer2.tsv b/tools/collapse_collection/test-data/answer2.tsv new file mode 100644 index 00000000000..7ccd9cd7e54 --- /dev/null +++ b/tools/collapse_collection/test-data/answer2.tsv @@ -0,0 +1,5 @@ +Sample seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +strain1.tsv mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +strain1.tsv mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980 +strain2.tsv mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +strain2.tsv mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833 diff --git a/tools/collapse_collection/test-data/answer3.tsv b/tools/collapse_collection/test-data/answer3.tsv new file mode 100644 index 00000000000..7d4cff3db3d --- /dev/null +++ b/tools/collapse_collection/test-data/answer3.tsv @@ -0,0 +1,5 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980 +mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833 diff --git a/tools/collapse_collection/test-data/input1 b/tools/collapse_collection/test-data/input1 new file mode 100644 index 00000000000..44f88264a3a --- /dev/null +++ b/tools/collapse_collection/test-data/input1 @@ -0,0 +1,4 @@ +first file +second +third +fourth line diff --git a/tools/collapse_collection/test-data/input2 b/tools/collapse_collection/test-data/input2 new file mode 100644 index 00000000000..d601dc95e7f --- /dev/null +++ b/tools/collapse_collection/test-data/input2 @@ -0,0 +1,4 @@ +second file +second +third +fourth line diff --git a/tools/collapse_collection/test-data/strain1.tsv b/tools/collapse_collection/test-data/strain1.tsv new file mode 100644 index 00000000000..a7c44fd4c9f --- /dev/null +++ b/tools/collapse_collection/test-data/strain1.tsv @@ -0,0 +1,3 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980 diff --git a/tools/collapse_collection/test-data/strain2.tsv b/tools/collapse_collection/test-data/strain2.tsv new file mode 100644 index 00000000000..cd15019dad6 --- /dev/null +++ b/tools/collapse_collection/test-data/strain2.tsv @@ -0,0 +1,3 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833