diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index cd5bf1306caa..6c57e33e8e10 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -31,6 +31,7 @@ + @@ -1412,6 +1413,7 @@ + diff --git a/lib/galaxy/datatypes/test/test.taf b/lib/galaxy/datatypes/test/test.taf new file mode 100644 index 000000000000..54d4ecc86bbd --- /dev/null +++ b/lib/galaxy/datatypes/test/test.taf @@ -0,0 +1,119 @@ +#taf version:1 scoring:N/A +GGGGGGGGG ; i 0 Anc0.Anc0refChr0 0 + 4151 i 1 Anc1.Anc1refChr1 292714 + 296994 i 2 Anc2.Anc2refChr1 5 + 4655 i 3 mr.mrrefChr1 178277 + 182340 i 4 simCow_chr6.simCow.chr6 5045 - 602619 i 5 simDog_chr6.simDog.chr6 589129 + 593897 i 6 simHuman_chr6.simHuman.chr6 597375 + 601863 i 7 simMouse_chr6.simMouse.chr6 630640 + 636262 i 8 simRat_chr6.simRat.chr6 642153 + 647215 +TTTTTTTTT +CCCCGCCCC +AAAAAAAAA +AAAAAAAAA +GGGGGGGGG +CCCCCCCCC +TTTTTTTAT +CCCCCCCTC +AAATAAATT +GGGGGGGGG +TTTTTTTTT +AATATTAAA +GGGCGGGCA +AAAAAGAAA +TTTTTTTTT +AAGAGGAAA +CCCCCCTCG +TTTTTTTTT +GGGAGGGAA +GGGGGGGGG +AAAAAAAAA +TTTTTTTTT +TTTTTTTTT +AAAGGAAGG +GGGGGAGGG +GGGAGGGAA +AAACAAACC +AAAAAAAAA +TTTTCTTTT +TTTTTTTTT +CCCCCCCCC +AAAAAAAAA +TTTTTTTTT +GGGGGGAGG +AAAGAAAGG +GGGAGGGAA +TTTTTTTTT +TTTGTTTGG +AAAAAAAAA +AAAAAGACA +GGGAGGCAA +CCCCCCCAC +TTTTTTTTT +GGGGGGGGG +TTTTTCTTT +AAAGAAAGG +GGGAAGGAC +CCTCGTCCC +CCCTCCCTT +AAAATAAAA ; g 3 1 g 7 1 g 8 1 +AAAAAAAAA ; g 4 1 +CCCCTCCCC +AAAAAAAAA +TTTATTTAA +CCCACCCAA +TTTACTGAA +GGGAGGGAA +AAAAAAGAA ; g 3 2 g 7 2 g 8 2 +TTTATTTAA +AAGAGGAAA +TTTTTTTTT +AAACAAGCC +TTTTT-CTT +TTTTT-TAT +CCCCC-CCC +TTTTT-TTC +CCCTC-ATT +TTTCT-TCC +CCACA-CCC +TTTTT-TTT +AAAAA-AAA +AAATA-ATT +TTTTT-TTT +AAAAA-AAA +TTGTG-TTT +TTTCT-TCC +TTT-T-T-- +TTT-T-A-- +CCC-C-G-- +TTT-T-T-- +AAA-A-A-- +AAA-A-C-- +TTT-T-T-- +CCC-C-C-- +TTT-T-T-- +TTT-T-T-- +AAAA-A ; d 3 d 6 d 6 g 5 1 +GGGG-G +TTTT-T +CCCC-C +GGGG-G +AAAA-A +TTTT-T +CCCC-G +TTTT-T +GGGG-G +GGGGGG +GGGGGG +AAAAAA +GGG-GAG-- ; i 3 mr.mrrefChr1 178357 + 182340 g 6 6 i 7 simMouse_chr6.simMouse.chr6 630720 + 636262 i 8 simRat_chr6.simRat.chr6 642233 + 647215 +TTT-TTT-- +AAA-AGA-- +CCC-CCA-- +AAAAAAAAA +GGGGGGGAG +TTTTTTTTT +TTTTCTTTT +TTTTTTTTT +GGGGGGAGG +AAAAAAAAA +TTGTGGTTT +TTTTTTTT- +AAAAAAAA- +TTTTTTTT- +GGG-GGG-- +TTATAATT- +CCCCGCCC- diff --git a/lib/galaxy/datatypes/text.py b/lib/galaxy/datatypes/text.py index e246ad5abf65..84798642c008 100644 --- a/lib/galaxy/datatypes/text.py +++ b/lib/galaxy/datatypes/text.py @@ -1669,3 +1669,33 @@ def _looks_like_sourmash(self, file_prefix: FilePrefix, load_size: int = 5000) - except Exception: pass return False + + +@build_sniff_from_prefix +class Taf(Text): + """ + TAF: a Transposed Alignment Format + + https://github.com/ComparativeGenomicsToolkit/taffy/blob/main/docs/taf_format.md + """ + + file_ext = "taf" + + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: + """ + Determines wether the file is in taf format + + The first line of a .taf file begins with #taf. This word is followed + by white-space-separated 'variable:value' pairs. There should be no white + space surrounding the ':'. + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('test.taf') + >>> Taf().sniff(fname) + True + """ + try: + first_line = next(file_prefix.line_iterator(), "") + except Exception: + return False + return re.search(r"^#taf([ \t]+\S+:\S+)+$", first_line) is not None