Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/galaxy/config/sample/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
<datatype extension="anvio_variability" type="galaxy.datatypes.tabular:TSV" display_in_upload="false" subclass="true" />
<datatype extension="arff" type="galaxy.datatypes.text:Arff" mimetype="text/plain" display_in_upload="true"/>
<datatype extension="paf" auto_compressed_types="gz" type="galaxy.datatypes.text:Paf" mimetype="text/plain" display_in_upload="true"/>
<datatype extension="taf" auto_compressed_types="gz" type="galaxy.datatypes.text:Taf" mimetype="text/plain" display_in_upload="true" description="Transposed Alignment Format. The first line of a .taf file begins with #taf. This word is followed by white-space-separated 'variable:value' pairs. There should be no white space surrounding the ':'." description_url="https://github.com/ComparativeGenomicsToolkit/taffy/blob/main/docs/taf_format.md"/>
<datatype extension="gfa1" auto_compressed_types="gz" type="galaxy.datatypes.text:Gfa1" mimetype="text/plain" display_in_upload="true"/>
<datatype extension="gfa2" auto_compressed_types="gz" type="galaxy.datatypes.text:Gfa2" mimetype="text/plain" display_in_upload="true">
<infer_from suffix="gfa" />
Expand Down Expand Up @@ -1412,6 +1413,7 @@
<sniffer type="galaxy.datatypes.tabular:Pileup"/>
<sniffer type="galaxy.datatypes.tabular:Psl"/>
<sniffer type="galaxy.datatypes.text:Paf"/>
<sniffer type="galaxy.datatypes.text:Taf"/>
<sniffer type="galaxy.datatypes.interval:Interval"/>
<sniffer type="galaxy.datatypes.tabular:FourDNPairs"/>
<sniffer type="galaxy.datatypes.tabular:FourDNPairsam"/>
Expand Down
119 changes: 119 additions & 0 deletions lib/galaxy/datatypes/test/test.taf
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#taf version:1 scoring:N/A
GGGGGGGGG ; i 0 Anc0.Anc0refChr0 0 + 4151 i 1 Anc1.Anc1refChr1 292714 + 296994 i 2 Anc2.Anc2refChr1 5 + 4655 i 3 mr.mrrefChr1 178277 + 182340 i 4 simCow_chr6.simCow.chr6 5045 - 602619 i 5 simDog_chr6.simDog.chr6 589129 + 593897 i 6 simHuman_chr6.simHuman.chr6 597375 + 601863 i 7 simMouse_chr6.simMouse.chr6 630640 + 636262 i 8 simRat_chr6.simRat.chr6 642153 + 647215
TTTTTTTTT
CCCCGCCCC
AAAAAAAAA
AAAAAAAAA
GGGGGGGGG
CCCCCCCCC
TTTTTTTAT
CCCCCCCTC
AAATAAATT
GGGGGGGGG
TTTTTTTTT
AATATTAAA
GGGCGGGCA
AAAAAGAAA
TTTTTTTTT
AAGAGGAAA
CCCCCCTCG
TTTTTTTTT
GGGAGGGAA
GGGGGGGGG
AAAAAAAAA
TTTTTTTTT
TTTTTTTTT
AAAGGAAGG
GGGGGAGGG
GGGAGGGAA
AAACAAACC
AAAAAAAAA
TTTTCTTTT
TTTTTTTTT
CCCCCCCCC
AAAAAAAAA
TTTTTTTTT
GGGGGGAGG
AAAGAAAGG
GGGAGGGAA
TTTTTTTTT
TTTGTTTGG
AAAAAAAAA
AAAAAGACA
GGGAGGCAA
CCCCCCCAC
TTTTTTTTT
GGGGGGGGG
TTTTTCTTT
AAAGAAAGG
GGGAAGGAC
CCTCGTCCC
CCCTCCCTT
AAAATAAAA ; g 3 1 g 7 1 g 8 1
AAAAAAAAA ; g 4 1
CCCCTCCCC
AAAAAAAAA
TTTATTTAA
CCCACCCAA
TTTACTGAA
GGGAGGGAA
AAAAAAGAA ; g 3 2 g 7 2 g 8 2
TTTATTTAA
AAGAGGAAA
TTTTTTTTT
AAACAAGCC
TTTTT-CTT
TTTTT-TAT
CCCCC-CCC
TTTTT-TTC
CCCTC-ATT
TTTCT-TCC
CCACA-CCC
TTTTT-TTT
AAAAA-AAA
AAATA-ATT
TTTTT-TTT
AAAAA-AAA
TTGTG-TTT
TTTCT-TCC
TTT-T-T--
TTT-T-A--
CCC-C-G--
TTT-T-T--
AAA-A-A--
AAA-A-C--
TTT-T-T--
CCC-C-C--
TTT-T-T--
TTT-T-T--
AAAA-A ; d 3 d 6 d 6 g 5 1
GGGG-G
TTTT-T
CCCC-C
GGGG-G
AAAA-A
TTTT-T
CCCC-G
TTTT-T
GGGG-G
GGGGGG
GGGGGG
AAAAAA
GGG-GAG-- ; i 3 mr.mrrefChr1 178357 + 182340 g 6 6 i 7 simMouse_chr6.simMouse.chr6 630720 + 636262 i 8 simRat_chr6.simRat.chr6 642233 + 647215
TTT-TTT--
AAA-AGA--
CCC-CCA--
AAAAAAAAA
GGGGGGGAG
TTTTTTTTT
TTTTCTTTT
TTTTTTTTT
GGGGGGAGG
AAAAAAAAA
TTGTGGTTT
TTTTTTTT-
AAAAAAAA-
TTTTTTTT-
GGG-GGG--
TTATAATT-
CCCCGCCC-
30 changes: 30 additions & 0 deletions lib/galaxy/datatypes/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1669,3 +1669,33 @@ def _looks_like_sourmash(self, file_prefix: FilePrefix, load_size: int = 5000) -
except Exception:
pass
return False


@build_sniff_from_prefix
class Taf(Text):
"""
TAF: a Transposed Alignment Format

https://github.com/ComparativeGenomicsToolkit/taffy/blob/main/docs/taf_format.md
"""

file_ext = "taf"

def sniff_prefix(self, file_prefix: FilePrefix) -> bool:
"""
Determines wether the file is in taf format

The first line of a .taf file begins with #taf. This word is followed
by white-space-separated 'variable:value' pairs. There should be no white
space surrounding the ':'.

>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('test.taf')
>>> Taf().sniff(fname)
True
"""
try:
first_line = next(file_prefix.line_iterator(), "")
except Exception:
return False
return re.search(r"^#taf([ \t]+\S+:\S+)+$", first_line) is not None
Loading