diff --git a/tools/taffy/.shed.yml b/tools/taffy/.shed.yml new file mode 100644 index 00000000000..6baa81ce8d6 --- /dev/null +++ b/tools/taffy/.shed.yml @@ -0,0 +1,23 @@ +name: taffy +owner: iuc +description: A set of tools for manipulating TAF files +long_description: | + The Transposed Alignment Format (TAF) structures multiple sequence alignments as a + sequence of columns, assigning each its own line. With optional run-length encoding + for bases, TAF minimizes file size and solves the data fragmentation issues common + in block-based formats like MAF. +homepage_url: https://github.com/ComparativeGenomicsToolkit/cactus +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/taffy +categories: +- Sequence Analysis +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for Taffy suite: {{ tool_name }}" +suite: + name: "suite_taffy" + description: "A set of tools for manipulating TAF files" + long_description: | + The Transposed Alignment Format (TAF) structures multiple sequence alignments as a + sequence of columns, assigning each its own line. With optional run-length encoding + for bases, TAF minimizes file size and solves the data fragmentation issues common + in block-based formats like MAF. diff --git a/tools/taffy/macros.xml b/tools/taffy/macros.xml new file mode 100644 index 00000000000..c64cef29258 --- /dev/null +++ b/tools/taffy/macros.xml @@ -0,0 +1,140 @@ + + + + taffy + + + 0.0.3 + 0 + 26.0 + + + '$uncompressed_input' && + #else if $input_file.is_of_type('maf.bz2') + bzip2 -dc '$input_file' > '$uncompressed_input' && + #else + ln -s '$input_file' '$uncompressed_input' && + #end if + ]]> + + '$out_file' + #else: + --outputFile '$out_file' + #end if + ]]> + + '$out_file' + #else if $compression_maf == 'bz2': + --outputFile temp_file && + bzip2 -c temp_file > '$out_file' + #else: + --outputFile '$out_file' + #end if + ]]> + + '$out_file' + #else: + --outputFile '$out_file' + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^[A-Za-z0-9._|:-]+( [A-Za-z0-9._|:-]+)*$ + + + ^\S(?:.*\S)?$ + + + + + + + + + + + + @misc{githubtaffy, + author = {Glenn Hickey, Benedict Paten}, + year = {}, + title = {taffy}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/ComparativeGenomicsToolkit/taffy}, + } + 10.1038/s41586-020-2871-y + + + \ No newline at end of file diff --git a/tools/taffy/taffy_add_gap_bases.xml b/tools/taffy/taffy_add_gap_bases.xml new file mode 100644 index 00000000000..2bccce24a05 --- /dev/null +++ b/tools/taffy/taffy_add_gap_bases.xml @@ -0,0 +1,162 @@ + + fills interstitial gaps in a TAF file + + macros.xml + + + + + '$temp_input' && + #else if $fasta_file.is_of_type('fasta.bz2') + bzip2 -dc '$fasta_file' > '$temp_input' && + #else + ln -s '$fasta_file' '$temp_input' && + #end if + #end for + #end if + + ## Run main command + taffy add-gap-bases + #if $gapFill.source == 'hal': + --halFile '$gapFill.input_hal' + #elif $gapFill.source == 'fasta': + #for $i, $fasta_file in enumerate($gapFill.input_fasta): + 'input_${i}.fasta' + #end for + #end if + --logLevel 'INFO' + --inputFile '$uncompressed_input' + --maximumGapStringLength $maximumGapStringLength + --repeatCoordinatesEveryNColumns $repeatCoordinatesEveryNColumns + + ## Compress TAF output if requested by user + @COMPRESS_TAF@ + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Genome.chr1``), without any additional description. + +- **HAL files:** + The HAL file must contain all genomes (species) present in the TAF file. + While it does not strictly need to contain every single sequence, missing sequences will result in gaps not being filled. + +When a gap is filled, the tool encodes this information with a 'G' (gap with sequence) tag by replacing the 'g' (gap) tag in the header line of the alignment block in the form: ``BASES ; G `` + +The maximum length of inserted gap strings can be limited to prevent filling excessively large regions. +Use this tool to produce alignments with explicit gap sequences, making them suitable for downstream analyses that require filled interstitial regions. + ]]> + + + \ No newline at end of file diff --git a/tools/taffy/taffy_annotate.xml b/tools/taffy/taffy_annotate.xml new file mode 100644 index 00000000000..9761a7d95a8 --- /dev/null +++ b/tools/taffy/taffy_annotate.xml @@ -0,0 +1,99 @@ + + adds annotations from a Wiggle file to a TAF file + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/taffy/taffy_coverage.xml b/tools/taffy/taffy_coverage.xml new file mode 100644 index 00000000000..72d60a3cc6c --- /dev/null +++ b/tools/taffy/taffy_coverage.xml @@ -0,0 +1,209 @@ + + computes basic pairwise coverage stats for a TAF file + + macros.xml + + + + + '$out_file' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^\d+(\s+\d+)*$ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/taffy/taffy_norm.xml b/tools/taffy/taffy_norm.xml new file mode 100644 index 00000000000..7f543f29aed --- /dev/null +++ b/tools/taffy/taffy_norm.xml @@ -0,0 +1,287 @@ + + normalizes blocks in a TAF file + + macros.xml + + + + + '$temp_input' && + #else if $fasta_file.is_of_type('fasta.bz2') + bzip2 -dc '$fasta_file' > '$temp_input' && + #else + ln -s '$fasta_file' '$temp_input' && + #end if + #end for + #end if + + ## Run main command + taffy norm + --logLevel 'INFO' + --inputFile '$uncompressed_input' + --maximumBlockLengthToMerge $maximumBlockLengthToMerge + --minimumSharedRows $shared.minimumSharedRows + --fractionSharedRows $shared.fractionSharedRows + --maximumGapLength $gaps.maximumGapLength + $gaps.filterGapCausingDupes + #if $gaps.gapFill.source == 'hal': + --halFile '$gaps.gapFill.input_hal' + #elif $gaps.gapFill.source == 'fasta': + --seqFiles + #for $i, $fasta_file in enumerate($gaps.gapFill.input_fasta): + 'input_${i}.fasta' + #end for + #end if + --repeatCoordinatesEveryNColumns $repeatCoordinatesEveryNColumns + #if $output.format == 'maf': + --maf + + ## Compress MAF output if requested by user + @COMPRESS_MAF@ + #else: + ## Compress TAF output if requested by user + @COMPRESS_TAF@ + #end if + ]]> + + + +
+ + +
+
+ + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Genome.chr1``), without any additional description. + +- **HAL files:** + The HAL file must contain all genomes (species) present in the TAF file. + While it does not strictly need to contain every single sequence, missing sequences will result in gaps not being filled. + +During normalization, rows are automatically sorted alphanumerically by sequence name. +If a different sorting order is required, the output should be further processed with 'Taffy sort'. +The output is written to TAF format by default, with an option to write MAF format. + ]]> + + +
\ No newline at end of file diff --git a/tools/taffy/taffy_sort.xml b/tools/taffy/taffy_sort.xml new file mode 100644 index 00000000000..942b8cbd636 --- /dev/null +++ b/tools/taffy/taffy_sort.xml @@ -0,0 +1,243 @@ + + orders rows of TAF file + + macros.xml + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/tools/taffy/taffy_stats.xml b/tools/taffy/taffy_stats.xml new file mode 100644 index 00000000000..b22517664a5 --- /dev/null +++ b/tools/taffy/taffy_stats.xml @@ -0,0 +1,162 @@ + + prints statistics from a TAF or MAF file + + macros.xml + + + + + '$out_file' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ``. + +- **Print BED intervals of each reference sequence:** + This option generates a BED file that lists the genomic intervals covered by each reference sequence in the TAF file. + This option is not compatible with MAF files. + +- **Print stats about block number, aligned bases, etc:** + This option reports a summary of alignment statistics, including: Total number of alignment blocks, columns, aligned bases, and gaps. + As well as average number of columns per blocks, column depth, bases per column, and gaps per column. + The output is a two-column tabular file. + ]]> + + + \ No newline at end of file diff --git a/tools/taffy/taffy_view.xml b/tools/taffy/taffy_view.xml new file mode 100644 index 00000000000..dc5ed06ad1d --- /dev/null +++ b/tools/taffy/taffy_view.xml @@ -0,0 +1,422 @@ + + converts between TAF and MAF formats + + macros.xml + + + + + + + + + + + + + + + + + ^.+(:(\d+)(-\d*)?)?$ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+ +
+ +
+ +
+ + + + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
\ No newline at end of file diff --git a/tools/taffy/test-data/mr.hal b/tools/taffy/test-data/mr.hal new file mode 100644 index 00000000000..8c06ed46dd5 Binary files /dev/null and b/tools/taffy/test-data/mr.hal differ diff --git a/tools/taffy/test-data/mr.mrrefChr1.fasta.gz b/tools/taffy/test-data/mr.mrrefChr1.fasta.gz new file mode 100644 index 00000000000..4758c13a5b5 Binary files /dev/null and b/tools/taffy/test-data/mr.mrrefChr1.fasta.gz differ diff --git a/tools/taffy/test-data/mr.taf b/tools/taffy/test-data/mr.taf new file mode 100644 index 00000000000..526f16244fe --- /dev/null +++ b/tools/taffy/test-data/mr.taf @@ -0,0 +1,106 @@ +#taf version:1 scoring:N/A +GGG ; i 0 mr.mrrefChr1 178277 + 182340 i 1 simMouse_chr6.simMouse.chr6 630640 + 636262 i 2 simRat_chr6.simRat.chr6 642153 + 647215 +TTT +CCC +AAA +AAA +GGG +CCC +TAT +CTC +TTT +GGG +TTT +AAA +CCA +AAA +TTT +AAA +CCG +TTT +AAA +GGG +AAA +TTT +TTT +GGG +GGG +AAA +CCC +AAA +TTT +TTT +CCC +AAA +TTT +GGG +GGG +AAA +TTT +GGG +AAA +ACA +AAA +CAC +TTT +GGG +TTT +GGG +AAC +CCC +TTT +AAA ; g 0 1 g 1 1 g 2 1 +AAA ; +CCC +AAA +AAA +AAA +AAA +AAA +AAA ; g 0 2 g 1 2 g 2 2 +AAA +AAA +TTT +CCC +TTT +TAT +CCC +TTC +TTT +CCC +CCC +TTT +AAA +TTT +TTT +AAA +TTT +CCC +--- +--- +--- +--- +--- +--- +--- +--- +--- +--- +--- ; +--- +--- +--- +AAA +GAG +TTT +TTT +TTT +GGG +AAA +TTT +TT- +AA- +TT- +--- +TT- +CC- diff --git a/tools/taffy/test-data/taffy_annotate_input.wig b/tools/taffy/test-data/taffy_annotate_input.wig new file mode 100644 index 00000000000..b1ec2d33768 --- /dev/null +++ b/tools/taffy/test-data/taffy_annotate_input.wig @@ -0,0 +1,7 @@ +variableStep chrom=Anc0refChr0 +1 0.95 +10 0.80 +25 0.15 + +variableStep chrom=Anc0refChr0 +30 0.5 diff --git a/tools/taffy/test-data/taffy_sort_dup_input.txt b/tools/taffy/test-data/taffy_sort_dup_input.txt new file mode 100644 index 00000000000..931290a0f26 --- /dev/null +++ b/tools/taffy/test-data/taffy_sort_dup_input.txt @@ -0,0 +1 @@ +sim \ No newline at end of file diff --git a/tools/taffy/test-data/taffy_sort_pad_input.txt b/tools/taffy/test-data/taffy_sort_pad_input.txt new file mode 100644 index 00000000000..38383ba0eaf --- /dev/null +++ b/tools/taffy/test-data/taffy_sort_pad_input.txt @@ -0,0 +1 @@ +missing_species \ No newline at end of file diff --git a/tools/taffy/test-data/taffy_sort_rem_input.txt b/tools/taffy/test-data/taffy_sort_rem_input.txt new file mode 100644 index 00000000000..ba56deff61c --- /dev/null +++ b/tools/taffy/test-data/taffy_sort_rem_input.txt @@ -0,0 +1,2 @@ +simCow +simDog \ No newline at end of file diff --git a/tools/taffy/test-data/taffy_sort_sor_input.txt b/tools/taffy/test-data/taffy_sort_sor_input.txt new file mode 100644 index 00000000000..9af81270caf --- /dev/null +++ b/tools/taffy/test-data/taffy_sort_sor_input.txt @@ -0,0 +1,9 @@ +simRat +simMouse +simHuman +simDog +simCow +mr +Anc2 +Anc1 +Anc0 \ No newline at end of file diff --git a/tools/taffy/test-data/taffy_view_mapping_input.tsv b/tools/taffy/test-data/taffy_view_mapping_input.tsv new file mode 100644 index 00000000000..c3f9ddea848 --- /dev/null +++ b/tools/taffy/test-data/taffy_view_mapping_input.tsv @@ -0,0 +1,2 @@ +Anc0.Anc0refChr0 NewAnc0refChr0 +simDog_chr6.simDog.chr6 NewSimDog \ No newline at end of file diff --git a/tools/taffy/test-data/taffy_view_phylogeny_input.txt b/tools/taffy/test-data/taffy_view_phylogeny_input.txt new file mode 100644 index 00000000000..e9048d97ec3 --- /dev/null +++ b/tools/taffy/test-data/taffy_view_phylogeny_input.txt @@ -0,0 +1 @@ +((simHuman_chr6,simCow_chr6,simDog_chr6)Anc1,(simMouse_chr6,simRat_chr6)Anc2,mr)Anc0; \ No newline at end of file diff --git a/tools/taffy/test-data/testMaf.maf b/tools/taffy/test-data/testMaf.maf new file mode 100644 index 00000000000..906f8469869 --- /dev/null +++ b/tools/taffy/test-data/testMaf.maf @@ -0,0 +1,65 @@ +##maf version=1 scoring=N/A + +a +s Anc0.Anc0refChr0 0 50 + 4151 GTCAAGCTCAGTAGATACTGGATTAGGAATTCATGAGTTAAGCTGTAGCC +s Anc1.Anc1refChr1 292714 50 + 296994 GTCAAGCTCAGTAGATACTGGATTAGGAATTCATGAGTTAAGCTGTAGCC +s Anc2.Anc2refChr1 5 50 + 4655 GTCAAGCTCAGTTGATGCTGGATTAGGAATTCATGAGTTAAGCTGTAGTC +s mr.mrrefChr1 178277 50 + 182340 GTCAAGCTCTGTACATACTAGATTGGACATTCATGGATGAAACTGTGACT +s simCow_chr6.simCow.chr6 5045 50 - 602619 GTGAAGCTCAGTTGATGCTGGATTGGGAACTCATGAGTTAAGCTGTAAGC +s simDog_chr6.simDog.chr6 589129 50 + 593897 GTCAAGCTCAGTTGGTGCTGGATTAAGAATTCATGAGTTAGGCTGCAGTC +s simHuman_chr6.simHuman.chr6 597375 50 + 601863 GTCAAGCTCAGTAGATATTGGATTAGGAATTCATAAGTTAACCTGTAGCC +s simMouse_chr6.simMouse.chr6 630640 50 + 636262 GTCAAGCATTGTACATACTAGATTGGACATTCATGGATGACAATGTGACT +s simRat_chr6.simRat.chr6 642153 50 + 647215 GTCAAGCTCTGTAAATAGTAGATTGGACATTCATGGATGAAACTGTGCCT + +a +s Anc0.Anc0refChr0 50 1 + 4151 A +s Anc1.Anc1refChr1 292764 1 + 296994 A +s Anc2.Anc2refChr1 55 1 + 4655 A +s mr.mrrefChr1 178328 1 + 182340 A +s simCow_chr6.simCow.chr6 5095 1 - 602619 T +s simDog_chr6.simDog.chr6 589179 1 + 593897 A +s simHuman_chr6.simHuman.chr6 597425 1 + 601863 A +s simMouse_chr6.simMouse.chr6 630691 1 + 636262 A +s simRat_chr6.simRat.chr6 642204 1 + 647215 A + +a +s Anc0.Anc0refChr0 51 7 + 4151 ACATCTG +s Anc1.Anc1refChr1 292765 7 + 296994 ACATCTG +s Anc2.Anc2refChr1 56 7 + 4655 ACATCTG +s mr.mrrefChr1 178329 7 + 182340 ACAAAAA +s simCow_chr6.simCow.chr6 5097 7 - 602619 ATATCCG +s simDog_chr6.simDog.chr6 589180 7 + 593897 ACATCTG +s simHuman_chr6.simHuman.chr6 597426 7 + 601863 ACATCGG +s simMouse_chr6.simMouse.chr6 630692 7 + 636262 ACAAAAA +s simRat_chr6.simRat.chr6 642205 7 + 647215 ACAAAAA + +a +s Anc0.Anc0refChr0 58 29 + 4151 ATATATTCTCTCTAATATTTTCTAATCTT +s Anc1.Anc1refChr1 292772 29 + 296994 ATATATTCTCTCTAATATTTTCTAATCTT +s Anc2.Anc2refChr1 63 29 + 4655 ATGTATTCTCTATAATAGTTTCTAATCTT +s mr.mrrefChr1 178338 19 + 182340 AAATCTTCTTCCTATTATC---------- +s simCow_chr6.simCow.chr6 5104 29 - 602619 ATGTATTCTCTATAATAGTTTCTAATCTT +s simDog_chr6.simDog.chr6 589187 5 + 593897 ATGTA------------------------ +s simHuman_chr6.simHuman.chr6 597433 29 + 601863 GTATGCTCTATCTAATATTTAGTACTCTT +s simMouse_chr6.simMouse.chr6 630701 19 + 636262 AAATCTACTTCCTATTATC---------- +s simRat_chr6.simRat.chr6 642214 19 + 647215 AAATCTTCCTCCTATTATC---------- + +a +s Anc0.Anc0refChr0 87 13 + 4151 AGTCGATCTGGGA +s Anc1.Anc1refChr1 292801 13 + 296994 AGTCGATCTGGGA +s Anc2.Anc2refChr1 92 13 + 4655 AGTCGATCTGGGA +s simCow_chr6.simCow.chr6 5133 13 - 602619 AGTCGATCTGGGA +s simDog_chr6.simDog.chr6 589192 3 + 593897 ----------GGA +s simHuman_chr6.simHuman.chr6 597463 13 + 601863 AGTCGATGTGGGA + +a +s Anc0.Anc0refChr0 100 18 + 4151 GTACAGTTTGATTATGTC +s Anc1.Anc1refChr1 292814 18 + 296994 GTACAGTTTGATTATGTC +s Anc2.Anc2refChr1 105 18 + 4655 GTACAGTTTGAGTATGAC +s mr.mrrefChr1 178357 13 + 182340 ----AGTTTGATTAT-TC +s simCow_chr6.simCow.chr6 5146 18 - 602619 GTACAGTCTGAGTATGAG +s simDog_chr6.simDog.chr6 589195 18 + 593897 ATGCAGTTTGAGTATGAC +s simHuman_chr6.simHuman.chr6 597482 18 + 601863 GTAAAGTTTAATTATGTC +s simMouse_chr6.simMouse.chr6 630720 13 + 636262 ----AATTTGATTAT-TC +s simRat_chr6.simRat.chr6 642233 8 + 647215 ----AGTTTGAT------ + diff --git a/tools/taffy/test-data/testMaf.maf.bz2 b/tools/taffy/test-data/testMaf.maf.bz2 new file mode 100644 index 00000000000..8f9ce79bd75 Binary files /dev/null and b/tools/taffy/test-data/testMaf.maf.bz2 differ diff --git a/tools/taffy/test-data/testMaf.maf.gz b/tools/taffy/test-data/testMaf.maf.gz new file mode 100644 index 00000000000..11e40d54b86 Binary files /dev/null and b/tools/taffy/test-data/testMaf.maf.gz differ diff --git a/tools/taffy/test-data/testTaf.taf b/tools/taffy/test-data/testTaf.taf new file mode 100644 index 00000000000..54d4ecc86bb --- /dev/null +++ b/tools/taffy/test-data/testTaf.taf @@ -0,0 +1,119 @@ +#taf version:1 scoring:N/A +GGGGGGGGG ; i 0 Anc0.Anc0refChr0 0 + 4151 i 1 Anc1.Anc1refChr1 292714 + 296994 i 2 Anc2.Anc2refChr1 5 + 4655 i 3 mr.mrrefChr1 178277 + 182340 i 4 simCow_chr6.simCow.chr6 5045 - 602619 i 5 simDog_chr6.simDog.chr6 589129 + 593897 i 6 simHuman_chr6.simHuman.chr6 597375 + 601863 i 7 simMouse_chr6.simMouse.chr6 630640 + 636262 i 8 simRat_chr6.simRat.chr6 642153 + 647215 +TTTTTTTTT +CCCCGCCCC +AAAAAAAAA +AAAAAAAAA +GGGGGGGGG +CCCCCCCCC +TTTTTTTAT +CCCCCCCTC +AAATAAATT +GGGGGGGGG +TTTTTTTTT +AATATTAAA +GGGCGGGCA +AAAAAGAAA +TTTTTTTTT +AAGAGGAAA +CCCCCCTCG +TTTTTTTTT +GGGAGGGAA +GGGGGGGGG +AAAAAAAAA +TTTTTTTTT +TTTTTTTTT +AAAGGAAGG +GGGGGAGGG +GGGAGGGAA +AAACAAACC +AAAAAAAAA +TTTTCTTTT +TTTTTTTTT +CCCCCCCCC +AAAAAAAAA +TTTTTTTTT +GGGGGGAGG +AAAGAAAGG +GGGAGGGAA +TTTTTTTTT +TTTGTTTGG +AAAAAAAAA +AAAAAGACA +GGGAGGCAA +CCCCCCCAC +TTTTTTTTT +GGGGGGGGG +TTTTTCTTT +AAAGAAAGG +GGGAAGGAC +CCTCGTCCC +CCCTCCCTT +AAAATAAAA ; g 3 1 g 7 1 g 8 1 +AAAAAAAAA ; g 4 1 +CCCCTCCCC +AAAAAAAAA +TTTATTTAA +CCCACCCAA +TTTACTGAA +GGGAGGGAA +AAAAAAGAA ; g 3 2 g 7 2 g 8 2 +TTTATTTAA +AAGAGGAAA +TTTTTTTTT +AAACAAGCC +TTTTT-CTT +TTTTT-TAT +CCCCC-CCC +TTTTT-TTC +CCCTC-ATT +TTTCT-TCC +CCACA-CCC +TTTTT-TTT +AAAAA-AAA +AAATA-ATT +TTTTT-TTT +AAAAA-AAA +TTGTG-TTT +TTTCT-TCC +TTT-T-T-- +TTT-T-A-- +CCC-C-G-- +TTT-T-T-- +AAA-A-A-- +AAA-A-C-- +TTT-T-T-- +CCC-C-C-- +TTT-T-T-- +TTT-T-T-- +AAAA-A ; d 3 d 6 d 6 g 5 1 +GGGG-G +TTTT-T +CCCC-C +GGGG-G +AAAA-A +TTTT-T +CCCC-G +TTTT-T +GGGG-G +GGGGGG +GGGGGG +AAAAAA +GGG-GAG-- ; i 3 mr.mrrefChr1 178357 + 182340 g 6 6 i 7 simMouse_chr6.simMouse.chr6 630720 + 636262 i 8 simRat_chr6.simRat.chr6 642233 + 647215 +TTT-TTT-- +AAA-AGA-- +CCC-CCA-- +AAAAAAAAA +GGGGGGGAG +TTTTTTTTT +TTTTCTTTT +TTTTTTTTT +GGGGGGAGG +AAAAAAAAA +TTGTGGTTT +TTTTTTTT- +AAAAAAAA- +TTTTTTTT- +GGG-GGG-- +TTATAATT- +CCCCGCCC- diff --git a/tools/taffy/test-data/testTaf.taf.gz b/tools/taffy/test-data/testTaf.taf.gz new file mode 100644 index 00000000000..995f61330a1 Binary files /dev/null and b/tools/taffy/test-data/testTaf.taf.gz differ