From 95a2be8461f4523f782f65fd36d0103ff1f47f96 Mon Sep 17 00:00:00 2001 From: shradhasaraf Date: Fri, 11 Jul 2025 11:11:20 +0100 Subject: [PATCH 1/2] Adding scripts to merge genes --- scripts/add_info.py | 75 +++++++++++++ scripts/merge_genes.py | 234 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100644 scripts/add_info.py create mode 100644 scripts/merge_genes.py diff --git a/scripts/add_info.py b/scripts/add_info.py new file mode 100644 index 000000000..68ee4b053 --- /dev/null +++ b/scripts/add_info.py @@ -0,0 +1,75 @@ +import os +import re + +file=open("sorted_combined.gff3","r") +lines=file.readlines() + +output=open("full_gff.gff3", "w") +list_id=[] +list_mrna=[] +main_id="" + + +for line in lines: + line=line.strip() + if line.startswith('Chr'): + last_column=line.split("\t")[8] + list_id=line.split("\t")[8].split(",") + flag1=flag2=flag3="" + for i in list_id: + if i.startswith('ID=Os'): + flag1=i + elif i.startswith('ID=gene:'): + flag2=i + elif i.startswith('ID=LOC'): + flag3=i + if flag1: + main_id=flag1 + + if flag1=="": + if flag2: + main_id=flag2 + else: + main_id=flag3 + main_id=main_id.replace("ID=","") + + new_last_column="ID="+str(main_id)+";Name="+str(main_id) + new_gene=line.replace(last_column,new_last_column) + output.write(new_gene) + output.write("\n") + + + for gene_id in list_id: + gene_id=gene_id.replace("ID=","") + if gene_id.startswith('Os'): + cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_RAPDB.gff" + if gene_id.startswith('LOC_Os'): + cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_MSU.gff" + if gene_id.startswith('gene:Osativa'): + cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_nipponbare.gff" + match=os.popen(cmd).read() + list_mrna=re.split("\n",match) + list_mrna.remove(list_mrna[-1]) + for mrna in list_mrna: + mrna_id=mrna.split("ID=")[1].split(";Name=")[0].split(";Parent=")[0] + parent_id=mrna.split("Parent=")[1] + old_mrna_parent="Parent=" +str(parent_id) + new_mrna_parent="Parent=" + str(main_id) + new_mrna=mrna.replace(old_mrna_parent,new_mrna_parent) + if mrna_id.startswith('Os'): + cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_RAPDB.gff" + if mrna_id.startswith('LOC_Os'): + cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_MSU.gff" + if mrna_id.startswith('transcript:Osativa'): + cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_nipponbare.gff" + exons_match=os.popen(cmd2).read() + output.write(new_mrna) + output.write("\n") + output.write(exons_match) + + + + + + + diff --git a/scripts/merge_genes.py b/scripts/merge_genes.py new file mode 100644 index 000000000..896436463 --- /dev/null +++ b/scripts/merge_genes.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Merge Genes script. + +This script aims to merge the genes from Nipponbare 3 annotation sources (RAPDB, MSU and Gramene). + +Below are some of the criteria to consider: +1. (plus) and (minus) strand genes are considered separately +2. The genes are considered as overlapping if the overlap length of the sequence is >=50% of the length of the smallest line. + +""" +import argparse +import logging +from pathlib import Path + +import pandas + +from ensembl.utils import StrPath +from ensembl.utils.argparse import ArgumentParser +from ensembl.utils.logging import init_logging_with_args + + +def update_gene_info( + update_gene: dict, + id_list: list, + merged_chromosome: str, + merged_gene_start: str, + merged_gene_end: str, + merged_strand: str, +) -> dict: + """ + Takes the id_list, other parameters from main, update the gene information and returns to the main + + Args: + update_gene (dict): column names as keys and corresponding parameters are values in teh form of list + id_list (list): A list of ids from genes to merge + merged_chromosome: Chromosome number from the merged gene + merged_gene_start: New gene start coordinate after merging the gene + merged_gene_end: New gene end coordinate after merging the gene + merged_strand: Strand from the merged genes + + Returns: + The updated gene. + """ + update_gene["chr"].append(merged_chromosome) + update_gene["source"].append("panoryza") + update_gene["gene"].append("gene") + update_gene["start"].append(merged_gene_start) + update_gene["end"].append(merged_gene_end) + update_gene["score"].append(".") + update_gene["strand"].append(merged_strand) + update_gene["frame"].append(".") + merge_ids = ",".join(id_list) + update_gene["attribute"].append(merge_ids) + return update_gene + + +def sort_genes(output_file: dict) -> pandas.DataFrame: + """Sort genes. + + Args: + output_file: gene information in a dictionary + Returns: + A sorted gene matrix. + + """ + gene_matrix = pandas.DataFrame.from_dict(output_file) + gene_matrix["chr_no"] = gene_matrix["chr"].str.extract("(\d+)", expand=False).astype(int) + gene_matrix.sort_values(by=["chr_no", "start"], inplace=True) + gene_matrix.drop("chr_no", axis=1, inplace=True) + return gene_matrix + + +def merge_genes(input_gff_file: StrPath) -> dict: + """Merge genes from the input GFF file. + + Args: + input_gff_file: Path to the GFF file. + + Returns: + The merged genes in a dictionary. + """ + + list_id = [] + dict_merged_gene = { + "chr": [], + "source": [], + "gene": [], + "start": [], + "end": [], + "score": [], + "strand": [], + "frame": [], + "attribute": [], + } + + logging.info("The input file is " + str(input_gff_file)) + + with open(input_gff_file) as gff_file: + lines = gff_file.readlines() + ## if line is starting with 'Chr' then split it and assign to different variables + for line in lines: + if line.startswith("#"): + continue + chromo, _, _, start, end, _, strand, _, description = line.split("\t") + gene_id = description.split(";Name=")[0].split(";biotype=")[0] + start = int(start) + end = int(end) + + ### Assign the values for the first gene in the gff + if line == lines[0]: + gene_start = start + gene_end = end + current_chromosome = str(line.split("\t")[0]) + current_strand = str(line.split("\t")[6]) + + ## Checking if the strand and chromosome are same between the previous and current line; otherwise print and start with the new gene + if strand != current_strand or chromo != current_chromosome: + dict_merged_gene = update_gene_info( + dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand + ) + current_strand = strand + gene_start = start + gene_end = end + current_chromosome = chromo + list_id = [] + + ## if start of the next gene is lesser than the current gene, raise exception + if start < gene_start: + raise Exception("Error: File is not sorted properly") + + ## if start coordinate of a gene is equal or greater than the previous AND if start coordinate is lesser than the gene end then start merging the genes + elif (start == gene_start or start > gene_start) and start < gene_end: + ## if end coord of current gene is lesser than the previous, then merge the gene into the previous one + if end <= gene_end: + list_id.append(gene_id) + + ## if end coord of current gene is greater than the previous, then calculate the length of both genes and check overlap criteria + elif end > gene_end: + gene_length = gene_end - gene_start + length = end - start + min_length = min(length, gene_length) + overlap_length = gene_end - start + + ## Checking the overlap criteria + if int(overlap_length) * 2 >= int(min_length): + gene_end = end + list_id.append(gene_id) + logging.info("overlap\n") + else: + dict_merged_gene = update_gene_info( + dict_merged_gene, + list_id, + current_chromosome, + gene_start, + gene_end, + current_strand, + ) + list_id = [] + gene_start = start + gene_end = end + list_id.append(gene_id) + + ## if start coord of a gene is greater than the end coord of the previous gene then print the previous gene (current gene will not merge into that) + if start >= gene_end: + dict_merged_gene = update_gene_info( + dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand + ) + list_id = [] + gene_start = start + gene_end = end + list_id.append(gene_id) + + ## If last line in the file, print the gene(s) + if line == lines[-1]: + dict_merged_gene = update_gene_info( + dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand + ) + return dict_merged_gene + + +def parse_args(arg_list: list[str] | None) -> argparse.Namespace: + """Parse the aruments from the command line. + + Args: + arg_list: List of arguments to parse. If `None`, grab them from the command line. + + Returns: + Parsed arguments. + + """ + parser = ArgumentParser(description="Merge genes script") + parser.add_argument_src_path( + "--input_gff_file", + default="sorted_all_line.gff", + help="Input file with gene info from RAPDB, MSU and Gramene", + ) + parser.add_argument_dst_path( + "--output_dir", default=".", help="Output directory for the merged genes file" + ) + parser.add_log_arguments() + return parser.parse_args(arg_list) + + +def main(arg_list: list[str] | None = None) -> None: + """Main script entry point. + + Args: + arg_list: Arguments to parse passing list to `parse_args()`. + """ + args = parse_args(arg_list) + init_logging_with_args(args) + + merge_genes_metadata = merge_genes(args.input_gff_file) + sorted_genes = sort_genes(merge_genes_metadata) + sorted_genes.to_csv(args.output_dir / "merged_genes.tsv", sep="\t", index=False) + + +if __name__ == "__main__": + main() From 0588d8482826aa08ca18ae93fbf605742d9d32d8 Mon Sep 17 00:00:00 2001 From: shradhasaraf Date: Mon, 14 Jul 2025 17:40:51 +0100 Subject: [PATCH 2/2] Added README and other files --- scripts/README.md | 18 +++ scripts/add_info.py | 15 +-- scripts/merged_genes.gff | 248 ++++++++++++++++++++++++++++++++++++ scripts/merged_genes.tsv | 5 + scripts/sorted_all_gene.gff | 10 ++ 5 files changed, 285 insertions(+), 11 deletions(-) create mode 100644 scripts/README.md create mode 100644 scripts/merged_genes.gff create mode 100644 scripts/merged_genes.tsv create mode 100644 scripts/sorted_all_gene.gff diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000..a8a498f53 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,18 @@ +The script merge_genes.py merges gene from Nipponbare 3 annotation sources (Gramene, MSU and RAPDB) based on GFF (obtained after merging GFF from 3 annotations namely oryza_sativa_RAPDB.gff, oryza_sativa_MSU.gff and oryza_sativa_gramene.gff) + +Input: sorted_all_gene.gff, this file contain genes from all three annotation sources and sorted based on chromosome number and position. +Output: merged_genes.tsv, contains merged genes in a tsv format. + +Script options can be explored with help option. + +`python merge_genes.py --help` + +For example: +`python merge_genes.py --input_gff_file sorted_all_gene.gff --output_dir /homes/user/merged_genes.tsv` + +After running merge_genes.py, the information of mRNA, CDS, exon etc. is added for each gene by script add_info.py. + +Input: merged_genes.tsv, oryza_sativa_RAPDB.gff, oryza_sativa_MSU.gff and oryza_sativa_gramene.gff +Output: merged_genes.gff + +Usage: `python add_info.py` diff --git a/scripts/add_info.py b/scripts/add_info.py index 68ee4b053..16b71765f 100644 --- a/scripts/add_info.py +++ b/scripts/add_info.py @@ -1,10 +1,10 @@ import os import re -file=open("sorted_combined.gff3","r") +file=open("merged_genes.tsv","r") lines=file.readlines() -output=open("full_gff.gff3", "w") +output=open("merged_genes.gff", "w") list_id=[] list_mrna=[] main_id="" @@ -46,7 +46,7 @@ if gene_id.startswith('LOC_Os'): cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_MSU.gff" if gene_id.startswith('gene:Osativa'): - cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_nipponbare.gff" + cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_gramene.gff" match=os.popen(cmd).read() list_mrna=re.split("\n",match) list_mrna.remove(list_mrna[-1]) @@ -61,15 +61,8 @@ if mrna_id.startswith('LOC_Os'): cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_MSU.gff" if mrna_id.startswith('transcript:Osativa'): - cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_nipponbare.gff" + cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_gramene.gff" exons_match=os.popen(cmd2).read() output.write(new_mrna) output.write("\n") output.write(exons_match) - - - - - - - diff --git a/scripts/merged_genes.gff b/scripts/merged_genes.gff new file mode 100644 index 000000000..41d9e716e --- /dev/null +++ b/scripts/merged_genes.gff @@ -0,0 +1,248 @@ +Chr1 panoryza gene 2903 10817 . + . ID=Os01g0100100;Name=Os01g0100100 +Chr1 MSU_osa1r7 mRNA 2903 10817 . + . ID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=Os01g0100100 +Chr1 MSU_osa1r7 exon 2903 3268 . + . ID=LOC_Os01g01010.1:exon_1;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 3354 3616 . + . ID=LOC_Os01g01010.1:exon_2;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 4357 4455 . + . ID=LOC_Os01g01010.1:exon_3;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 5457 5560 . + . ID=LOC_Os01g01010.1:exon_4;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 7136 7944 . + . ID=LOC_Os01g01010.1:exon_5;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 8028 8150 . + . ID=LOC_Os01g01010.1:exon_6;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 8232 8320 . + . ID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 8408 8608 . + . ID=LOC_Os01g01010.1:exon_8;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 9210 9617 . + . ID=LOC_Os01g01010.1:exon_9;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 10104 10187 . + . ID=LOC_Os01g01010.1:exon_10;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 10274 10430 . + . ID=LOC_Os01g01010.1:exon_11;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 exon 10504 10817 . + . ID=LOC_Os01g01010.1:exon_12;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 five_prime_UTR 2903 3268 . + . ID=LOC_Os01g01010.1:utr_1;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 five_prime_UTR 3354 3448 . + . ID=LOC_Os01g01010.1:utr_2;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 3449 3616 . + . ID=LOC_Os01g01010.1:cds_1;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 4357 4455 . + . ID=LOC_Os01g01010.1:cds_2;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 5457 5560 . + . ID=LOC_Os01g01010.1:cds_3;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 7136 7944 . + . ID=LOC_Os01g01010.1:cds_4;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 8028 8150 . + . ID=LOC_Os01g01010.1:cds_5;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 8232 8320 . + . ID=LOC_Os01g01010.1:cds_6;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 8408 8608 . + . ID=LOC_Os01g01010.1:cds_7;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 9210 9617 . + . ID=LOC_Os01g01010.1:cds_8;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 10104 10187 . + . ID=LOC_Os01g01010.1:cds_9;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 CDS 10274 10297 . + . ID=LOC_Os01g01010.1:cds_10;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 three_prime_UTR 10298 10430 . + . ID=LOC_Os01g01010.1:utr_3;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 three_prime_UTR 10504 10817 . + . ID=LOC_Os01g01010.1:utr_4;Parent=LOC_Os01g01010.1 +Chr1 MSU_osa1r7 mRNA 2984 10562 . + . ID=LOC_Os01g01010.2;Name=LOC_Os01g01010.2;Parent=Os01g0100100 +Chr1 MSU_osa1r7 exon 2984 3255 . + . ID=LOC_Os01g01010.2:exon_1;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 3354 3616 . + . ID=LOC_Os01g01010.2:exon_2;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 4357 4455 . + . ID=LOC_Os01g01010.2:exon_3;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 5457 5560 . + . ID=LOC_Os01g01010.2:exon_4;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 7136 7944 . + . ID=LOC_Os01g01010.2:exon_5;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 8028 8150 . + . ID=LOC_Os01g01010.2:exon_6;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 8232 8320 . + . ID=LOC_Os01g01010.2:exon_7;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 8408 8608 . + . ID=LOC_Os01g01010.2:exon_8;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 9210 9311 . + . ID=LOC_Os01g01010.2:exon_9;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 10104 10187 . + . ID=LOC_Os01g01010.2:exon_10;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 10232 10244 . + . ID=LOC_Os01g01010.2:exon_11;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 exon 10504 10562 . + . ID=LOC_Os01g01010.2:exon_12;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 five_prime_UTR 2984 3255 . + . ID=LOC_Os01g01010.2:utr_1;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 five_prime_UTR 3354 3448 . + . ID=LOC_Os01g01010.2:utr_2;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 3449 3616 . + . ID=LOC_Os01g01010.2:cds_1;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 4357 4455 . + . ID=LOC_Os01g01010.2:cds_2;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 5457 5560 . + . ID=LOC_Os01g01010.2:cds_3;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 7136 7944 . + . ID=LOC_Os01g01010.2:cds_4;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 8028 8150 . + . ID=LOC_Os01g01010.2:cds_5;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 8232 8320 . + . ID=LOC_Os01g01010.2:cds_6;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 8408 8608 . + . ID=LOC_Os01g01010.2:cds_7;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 9210 9311 . + . ID=LOC_Os01g01010.2:cds_8;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 10104 10187 . + . ID=LOC_Os01g01010.2:cds_9;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 10232 10244 . + . ID=LOC_Os01g01010.2:cds_10;Parent=LOC_Os01g01010.2 +Chr1 MSU_osa1r7 CDS 10504 10562 . + . ID=LOC_Os01g01010.2:cds_11;Parent=LOC_Os01g01010.2 +1 NAM mRNA 2976 9671 . + . ID=transcript:Osativa.01G000010_01;Parent=Os01g0100100 +1 NAM five_prime_UTR 2976 3268 . + . Parent=transcript:Osativa.01G000010_01 +1 NAM five_prime_UTR 3354 3448 . + . Parent=transcript:Osativa.01G000010_01 +1 NAM exon 2976 3268 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000010_01.exon.1;rank=1 +1 NAM exon 3354 3616 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.2;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.2;rank=2 +1 NAM exon 4357 4455 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.3;rank=3 +1 NAM exon 5457 5560 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000010_01.exon.4;rank=4 +1 NAM exon 7136 7944 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.5;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_01.exon.5;rank=5 +1 NAM exon 8028 8150 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.6;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_01.exon.6;rank=6 +1 NAM exon 8232 8320 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.7;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.7;rank=7 +1 NAM exon 8408 8629 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.8;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.8;rank=8 +1 NAM exon 9210 9671 . + . Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.9;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.9;rank=9 +1 NAM CDS 3449 3616 . + 0 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 4357 4455 . + 0 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 5457 5560 . + 0 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 7136 7944 . + 1 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 8028 8150 . + 2 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 8232 8320 . + 2 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 8408 8629 . + 0 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM CDS 9210 9671 . + 0 ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01 +1 NAM mRNA 2976 9671 . + . ID=transcript:Osativa.01G000010_02;Parent=Os01g0100100 +1 NAM five_prime_UTR 2976 3268 . + . Parent=transcript:Osativa.01G000010_02 +1 NAM five_prime_UTR 3354 3448 . + . Parent=transcript:Osativa.01G000010_02 +1 NAM exon 2976 3268 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000010_02.exon.1;rank=1 +1 NAM exon 3354 3616 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.2;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.2;rank=2 +1 NAM exon 4357 4455 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.3;rank=3 +1 NAM exon 5457 5560 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000010_02.exon.4;rank=4 +1 NAM exon 7136 7944 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.5;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_02.exon.5;rank=5 +1 NAM exon 8028 8150 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.6;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_02.exon.6;rank=6 +1 NAM exon 8232 8320 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.7;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.7;rank=7 +1 NAM exon 8408 8608 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.8;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.8;rank=8 +1 NAM exon 9210 9671 . + . Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.9;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.9;rank=9 +1 NAM CDS 3449 3616 . + 0 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 4357 4455 . + 0 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 5457 5560 . + 0 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 7136 7944 . + 1 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 8028 8150 . + 2 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 8232 8320 . + 2 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 8408 8608 . + 0 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +1 NAM CDS 9210 9671 . + 0 ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02 +chr01 irgsp1_rep mRNA 2983 10815 . + . ID=Os01t0100100-01;Name=Os01t0100100-01;Parent=Os01g0100100 +chr01 irgsp1_rep exon 2983 3268 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 3354 3616 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 4357 4455 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 5457 5560 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 7136 7944 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 8028 8150 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 8232 8320 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 8408 8608 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 9210 9615 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 10102 10187 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 10274 10430 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep exon 10504 10815 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep five_prime_UTR 2983 3268 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep five_prime_UTR 3354 3448 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 3449 3616 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 4357 4455 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 5457 5560 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 7136 7944 . + 1 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 8028 8150 . + 2 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 8232 8320 . + 2 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 8408 8608 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 9210 9615 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 10102 10187 . + 2 Parent=Os01t0100100-01 +chr01 irgsp1_rep CDS 10274 10297 . + 0 Parent=Os01t0100100-01 +chr01 irgsp1_rep three_prime_UTR 10298 10430 . + . Parent=Os01t0100100-01 +chr01 irgsp1_rep three_prime_UTR 10504 10815 . + . Parent=Os01t0100100-01 +Chr1 panoryza gene 11218 12435 . + . ID=Os01g0100200;Name=Os01g0100200 +chr01 irgsp1_rep mRNA 11218 12435 . + . ID=Os01t0100200-01;Name=Os01t0100200-01;Parent=Os01g0100200 +chr01 irgsp1_rep exon 11218 12060 . + . Parent=Os01t0100200-01 +chr01 irgsp1_rep exon 12152 12435 . + . Parent=Os01t0100200-01 +chr01 irgsp1_rep five_prime_UTR 11218 11797 . + . Parent=Os01t0100200-01 +chr01 irgsp1_rep CDS 11798 12060 . + 0 Parent=Os01t0100200-01 +chr01 irgsp1_rep CDS 12152 12317 . + 1 Parent=Os01t0100200-01 +chr01 irgsp1_rep three_prime_UTR 12318 12435 . + . Parent=Os01t0100200-01 +Chr1 MSU_osa1r7 mRNA 11218 12435 . + . ID=LOC_Os01g01019.1;Name=LOC_Os01g01019.1;Parent=Os01g0100200 +Chr1 MSU_osa1r7 exon 11218 12060 . + . ID=LOC_Os01g01019.1:exon_1;Parent=LOC_Os01g01019.1 +Chr1 MSU_osa1r7 exon 12152 12435 . + . ID=LOC_Os01g01019.1:exon_2;Parent=LOC_Os01g01019.1 +Chr1 MSU_osa1r7 five_prime_UTR 11218 11797 . + . ID=LOC_Os01g01019.1:utr_1;Parent=LOC_Os01g01019.1 +Chr1 MSU_osa1r7 CDS 11798 12060 . + . ID=LOC_Os01g01019.1:cds_1;Parent=LOC_Os01g01019.1 +Chr1 MSU_osa1r7 CDS 12152 12317 . + . ID=LOC_Os01g01019.1:cds_2;Parent=LOC_Os01g01019.1 +Chr1 MSU_osa1r7 three_prime_UTR 12318 12435 . + . ID=LOC_Os01g01019.1:utr_2;Parent=LOC_Os01g01019.1 +1 NAM mRNA 11371 12435 . + . ID=transcript:Osativa.01G000020_01;Parent=Os01g0100200 +1 NAM five_prime_UTR 11371 11797 . + . Parent=transcript:Osativa.01G000020_01 +1 NAM exon 11371 12060 . + . Parent=transcript:Osativa.01G000020_01;Name=Osativa.01G000020_01.exon.1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000020_01.exon.1;rank=1 +1 NAM exon 12152 12435 . + . Parent=transcript:Osativa.01G000020_01;Name=Osativa.01G000020_01.exon.2;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000020_01.exon.2;rank=2 +1 NAM CDS 11798 12060 . + 0 ID=CDS:Osativa.01G000020_01;Parent=transcript:Osativa.01G000020_01;protein_id=Osativa.01G000020_01 +1 NAM CDS 12152 12317 . + 1 ID=CDS:Osativa.01G000020_01;Parent=transcript:Osativa.01G000020_01;protein_id=Osativa.01G000020_01 +1 NAM three_prime_UTR 12318 12435 . + . Parent=transcript:Osativa.01G000020_01 +Chr1 panoryza gene 12648 15915 . + . ID=Os01g0100400;Name=Os01g0100400 +Chr1 MSU_osa1r7 mRNA 12648 15915 . + . ID=LOC_Os01g01030.1;Name=LOC_Os01g01030.1;Parent=Os01g0100400 +Chr1 MSU_osa1r7 exon 12648 13813 . + . ID=LOC_Os01g01030.1:exon_1;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 exon 13906 14271 . + . ID=LOC_Os01g01030.1:exon_2;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 exon 14359 14437 . + . ID=LOC_Os01g01030.1:exon_3;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 exon 14969 15171 . + . ID=LOC_Os01g01030.1:exon_4;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 exon 15266 15915 . + . ID=LOC_Os01g01030.1:exon_5;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 five_prime_UTR 12648 12773 . + . ID=LOC_Os01g01030.1:utr_1;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 CDS 12774 13813 . + . ID=LOC_Os01g01030.1:cds_1;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 CDS 13906 14271 . + . ID=LOC_Os01g01030.1:cds_2;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 CDS 14359 14437 . + . ID=LOC_Os01g01030.1:cds_3;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 CDS 14969 15171 . + . ID=LOC_Os01g01030.1:cds_4;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 CDS 15266 15359 . + . ID=LOC_Os01g01030.1:cds_5;Parent=LOC_Os01g01030.1 +Chr1 MSU_osa1r7 three_prime_UTR 15360 15915 . + . ID=LOC_Os01g01030.1:utr_2;Parent=LOC_Os01g01030.1 +chr01 irgsp1_rep mRNA 12721 15685 . + . ID=Os01t0100400-01;Name=Os01t0100400-01;Parent=Os01g0100400 +chr01 irgsp1_rep exon 12721 13813 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep exon 13906 14271 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep exon 14359 14437 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep exon 14969 15171 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep exon 15266 15685 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep five_prime_UTR 12721 12773 . + . Parent=Os01t0100400-01 +chr01 irgsp1_rep CDS 12774 13813 . + 0 Parent=Os01t0100400-01 +chr01 irgsp1_rep CDS 13906 14271 . + 1 Parent=Os01t0100400-01 +chr01 irgsp1_rep CDS 14359 14437 . + 1 Parent=Os01t0100400-01 +chr01 irgsp1_rep CDS 14969 15171 . + 0 Parent=Os01t0100400-01 +chr01 irgsp1_rep CDS 15266 15359 . + 1 Parent=Os01t0100400-01 +chr01 irgsp1_rep three_prime_UTR 15360 15685 . + . Parent=Os01t0100400-01 +1 NAM mRNA 12721 15915 . + . ID=transcript:Osativa.01G000040_01;Parent=Os01g0100400 +1 NAM five_prime_UTR 12721 12773 . + . Parent=transcript:Osativa.01G000040_01 +1 NAM exon 12721 13813 . + . Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.1;rank=1 +1 NAM exon 13906 14271 . + . Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.2;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.2;rank=2 +1 NAM exon 14359 14437 . + . Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000040_01.exon.3;rank=3 +1 NAM exon 14969 15171 . + . Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.4;rank=4 +1 NAM exon 15266 15915 . + . Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.5;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000040_01.exon.5;rank=5 +1 NAM CDS 12774 13813 . + 0 ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01 +1 NAM CDS 13906 14271 . + 1 ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01 +1 NAM CDS 14359 14437 . + 1 ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01 +1 NAM CDS 14969 15171 . + 0 ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01 +1 NAM CDS 15266 15359 . + 1 ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01 +1 NAM three_prime_UTR 15360 15915 . + . Parent=transcript:Osativa.01G000040_01 +Chr1 panoryza gene 16292 20323 . + . ID=LOC_Os01g01040;Name=LOC_Os01g01040 +Chr1 MSU_osa1r7 mRNA 16292 20323 . + . ID=LOC_Os01g01040.1;Name=LOC_Os01g01040.1;Parent=LOC_Os01g01040 +Chr1 MSU_osa1r7 exon 16292 16976 . + . ID=LOC_Os01g01040.1:exon_1;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 17383 17474 . + . ID=LOC_Os01g01040.1:exon_2;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 17558 18258 . + . ID=LOC_Os01g01040.1:exon_3;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 18501 18571 . + . ID=LOC_Os01g01040.1:exon_4;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 18968 19057 . + . ID=LOC_Os01g01040.1:exon_5;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 19142 19321 . + . ID=LOC_Os01g01040.1:exon_6;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 19531 19629 . + . ID=LOC_Os01g01040.1:exon_7;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 exon 19734 20323 . + . ID=LOC_Os01g01040.1:exon_8;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 five_prime_UTR 16292 16598 . + . ID=LOC_Os01g01040.1:utr_1;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 16599 16976 . + . ID=LOC_Os01g01040.1:cds_1;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 17383 17474 . + . ID=LOC_Os01g01040.1:cds_2;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 17558 18258 . + . ID=LOC_Os01g01040.1:cds_3;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 18501 18571 . + . ID=LOC_Os01g01040.1:cds_4;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 18968 19057 . + . ID=LOC_Os01g01040.1:cds_5;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 19142 19321 . + . ID=LOC_Os01g01040.1:cds_6;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 CDS 19531 19593 . + . ID=LOC_Os01g01040.1:cds_7;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 three_prime_UTR 19594 19629 . + . ID=LOC_Os01g01040.1:utr_2;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 three_prime_UTR 19734 20323 . + . ID=LOC_Os01g01040.1:utr_3;Parent=LOC_Os01g01040.1 +Chr1 MSU_osa1r7 mRNA 16321 20323 . + . ID=LOC_Os01g01040.2;Name=LOC_Os01g01040.2;Parent=LOC_Os01g01040 +Chr1 MSU_osa1r7 exon 16321 16976 . + . ID=LOC_Os01g01040.2:exon_1;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 17383 17474 . + . ID=LOC_Os01g01040.2:exon_2;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 17558 18258 . + . ID=LOC_Os01g01040.2:exon_3;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 18501 18571 . + . ID=LOC_Os01g01040.2:exon_4;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 18968 19057 . + . ID=LOC_Os01g01040.2:exon_5;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 19142 19321 . + . ID=LOC_Os01g01040.2:exon_6;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 19531 19629 . + . ID=LOC_Os01g01040.2:exon_7;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 exon 19731 20323 . + . ID=LOC_Os01g01040.2:exon_8;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 five_prime_UTR 16321 16598 . + . ID=LOC_Os01g01040.2:utr_1;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 16599 16976 . + . ID=LOC_Os01g01040.2:cds_1;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 17383 17474 . + . ID=LOC_Os01g01040.2:cds_2;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 17558 18258 . + . ID=LOC_Os01g01040.2:cds_3;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 18501 18571 . + . ID=LOC_Os01g01040.2:cds_4;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 18968 19057 . + . ID=LOC_Os01g01040.2:cds_5;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 19142 19321 . + . ID=LOC_Os01g01040.2:cds_6;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 CDS 19531 19593 . + . ID=LOC_Os01g01040.2:cds_7;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 three_prime_UTR 19594 19629 . + . ID=LOC_Os01g01040.2:utr_2;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 three_prime_UTR 19731 20323 . + . ID=LOC_Os01g01040.2:utr_3;Parent=LOC_Os01g01040.2 +Chr1 MSU_osa1r7 mRNA 16321 20323 . + . ID=LOC_Os01g01040.3;Name=LOC_Os01g01040.3;Parent=LOC_Os01g01040 +Chr1 MSU_osa1r7 exon 16321 16976 . + . ID=LOC_Os01g01040.3:exon_1;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 17383 17474 . + . ID=LOC_Os01g01040.3:exon_2;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 17558 18258 . + . ID=LOC_Os01g01040.3:exon_3;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 18501 18571 . + . ID=LOC_Os01g01040.3:exon_4;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 18968 19057 . + . ID=LOC_Os01g01040.3:exon_5;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 19142 19321 . + . ID=LOC_Os01g01040.3:exon_6;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 exon 19531 20323 . + . ID=LOC_Os01g01040.3:exon_7;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 five_prime_UTR 16321 16598 . + . ID=LOC_Os01g01040.3:utr_1;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 16599 16976 . + . ID=LOC_Os01g01040.3:cds_1;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 17383 17474 . + . ID=LOC_Os01g01040.3:cds_2;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 17558 18258 . + . ID=LOC_Os01g01040.3:cds_3;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 18501 18571 . + . ID=LOC_Os01g01040.3:cds_4;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 18968 19057 . + . ID=LOC_Os01g01040.3:cds_5;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 19142 19321 . + . ID=LOC_Os01g01040.3:cds_6;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 CDS 19531 19593 . + . ID=LOC_Os01g01040.3:cds_7;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 three_prime_UTR 19594 20323 . + . ID=LOC_Os01g01040.3:utr_2;Parent=LOC_Os01g01040.3 +Chr1 MSU_osa1r7 mRNA 16292 18304 . + . ID=LOC_Os01g01040.4;Name=LOC_Os01g01040.4;Parent=LOC_Os01g01040 +Chr1 MSU_osa1r7 exon 16292 16976 . + . ID=LOC_Os01g01040.4:exon_1;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 exon 17383 17474 . + . ID=LOC_Os01g01040.4:exon_2;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 exon 17558 18304 . + . ID=LOC_Os01g01040.4:exon_3;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 five_prime_UTR 16292 16598 . + . ID=LOC_Os01g01040.4:utr_1;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 CDS 16599 16976 . + . ID=LOC_Os01g01040.4:cds_1;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 CDS 17383 17474 . + . ID=LOC_Os01g01040.4:cds_2;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 CDS 17558 18272 . + . ID=LOC_Os01g01040.4:cds_3;Parent=LOC_Os01g01040.4 +Chr1 MSU_osa1r7 three_prime_UTR 18273 18304 . + . ID=LOC_Os01g01040.4:utr_2;Parent=LOC_Os01g01040.4 diff --git a/scripts/merged_genes.tsv b/scripts/merged_genes.tsv new file mode 100644 index 000000000..696f97e40 --- /dev/null +++ b/scripts/merged_genes.tsv @@ -0,0 +1,5 @@ +chr source gene start end score strand frame attribute +Chr1 panoryza gene 2903 10817 . + . ID=LOC_Os01g01010,ID=gene:Osativa.01G000010,ID=Os01g0100100 +Chr1 panoryza gene 11218 12435 . + . ID=Os01g0100200,ID=LOC_Os01g01019,ID=gene:Osativa.01G000020 +Chr1 panoryza gene 12648 15915 . + . ID=LOC_Os01g01030,ID=Os01g0100400,ID=gene:Osativa.01G000040 +Chr1 panoryza gene 16292 20323 . + . ID=LOC_Os01g01040 diff --git a/scripts/sorted_all_gene.gff b/scripts/sorted_all_gene.gff new file mode 100644 index 000000000..30fee71c1 --- /dev/null +++ b/scripts/sorted_all_gene.gff @@ -0,0 +1,10 @@ +Chr1 MSU_osa1r7 gene 2903 10817 . + . ID=LOC_Os01g01010;Name=LOC_Os01g01010;Note=TBC%20domain%20containing%20protein%2C%20expressed +Chr1 NAM gene 2976 9671 . + . ID=gene:Osativa.01G000010;biotype=protein_coding;logic_name=oryza_cshl +Chr1 irgsp1_locus gene 2983 10815 . + . ID=Os01g0100100;Name=Os01g0100100;Note=RabGAP/TBC domain containing protein. (Os01t0100100-01);Transcript variants=Os01t0100100-01 +Chr1 irgsp1_locus gene 11218 12435 . + . ID=Os01g0100200;Name=Os01g0100200;Note=Conserved hypothetical protein. (Os01t0100200-01);Transcript variants=Os01t0100200-01 +Chr1 MSU_osa1r7 gene 11218 12435 . + . ID=LOC_Os01g01019;Name=LOC_Os01g01019;Note=expressed%20protein +Chr1 NAM gene 11371 12435 . + . ID=gene:Osativa.01G000020;biotype=protein_coding;logic_name=oryza_cshl +Chr1 MSU_osa1r7 gene 12648 15915 . + . ID=LOC_Os01g01030;Name=LOC_Os01g01030;Note=monocopper%20oxidase%2C%20putative%2C%20expressed +Chr1 irgsp1_locus gene 12721 15685 . + . ID=Os01g0100400;Name=Os01g0100400;Note=Similar to Pectinesterase-like protein. (Os01t0100400-01);Transcript variants=Os01t0100400-01 +Chr1 NAM gene 12721 15915 . + . ID=gene:Osativa.01G000040;biotype=protein_coding;logic_name=oryza_cshl +Chr1 MSU_osa1r7 gene 16292 20323 . + . ID=LOC_Os01g01040;Name=LOC_Os01g01040;Note=expressed%20protein