From 95a2be8461f4523f782f65fd36d0103ff1f47f96 Mon Sep 17 00:00:00 2001
From: shradhasaraf <shradha@ebi.ac.uk>
Date: Fri, 11 Jul 2025 11:11:20 +0100
Subject: [PATCH 1/2] Adding scripts to merge genes

---
 scripts/add_info.py    |  75 +++++++++++++
 scripts/merge_genes.py | 234 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 scripts/add_info.py
 create mode 100644 scripts/merge_genes.py

diff --git a/scripts/add_info.py b/scripts/add_info.py
new file mode 100644
index 000000000..68ee4b053
--- /dev/null
+++ b/scripts/add_info.py
@@ -0,0 +1,75 @@
+import os
+import re
+
+file=open("sorted_combined.gff3","r")    
+lines=file.readlines()
+
+output=open("full_gff.gff3", "w")
+list_id=[]
+list_mrna=[]
+main_id=""
+
+
+for line in lines:
+    line=line.strip()
+    if line.startswith('Chr'):
+        last_column=line.split("\t")[8]
+        list_id=line.split("\t")[8].split(",")
+        flag1=flag2=flag3=""
+        for i in list_id:
+            if i.startswith('ID=Os'):
+                flag1=i
+            elif i.startswith('ID=gene:'):
+                flag2=i
+            elif i.startswith('ID=LOC'):
+                flag3=i
+        if flag1:
+            main_id=flag1
+
+        if flag1=="":
+            if flag2:
+                main_id=flag2
+            else:
+                main_id=flag3
+        main_id=main_id.replace("ID=","")
+
+        new_last_column="ID="+str(main_id)+";Name="+str(main_id)
+        new_gene=line.replace(last_column,new_last_column)
+        output.write(new_gene)
+        output.write("\n")
+
+
+        for gene_id in list_id:
+            gene_id=gene_id.replace("ID=","")
+            if gene_id.startswith('Os'):
+                cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_RAPDB.gff"
+            if gene_id.startswith('LOC_Os'):
+                cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_MSU.gff"
+            if gene_id.startswith('gene:Osativa'):
+                cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_nipponbare.gff"
+            match=os.popen(cmd).read()
+            list_mrna=re.split("\n",match)
+            list_mrna.remove(list_mrna[-1])
+            for mrna in list_mrna:
+                mrna_id=mrna.split("ID=")[1].split(";Name=")[0].split(";Parent=")[0]
+                parent_id=mrna.split("Parent=")[1]
+                old_mrna_parent="Parent=" +str(parent_id)
+                new_mrna_parent="Parent=" + str(main_id)
+                new_mrna=mrna.replace(old_mrna_parent,new_mrna_parent)
+                if mrna_id.startswith('Os'):
+                    cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_RAPDB.gff"
+                if mrna_id.startswith('LOC_Os'):
+                    cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_MSU.gff"
+                if mrna_id.startswith('transcript:Osativa'):
+                    cmd2="grep -w  Parent=" + mrna_id + " oryza_sativa_nipponbare.gff"
+                exons_match=os.popen(cmd2).read()
+                output.write(new_mrna)
+                output.write("\n")
+                output.write(exons_match)
+
+
+
+
+
+
+
diff --git a/scripts/merge_genes.py b/scripts/merge_genes.py
new file mode 100644
index 000000000..896436463
--- /dev/null
+++ b/scripts/merge_genes.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+# See the NOTICE file distributed with this work for additional information
+# regarding copyright ownership.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Merge Genes script.
+
+This script aims to merge the genes from Nipponbare 3 annotation sources (RAPDB, MSU and Gramene).
+
+Below are some of the criteria to consider:
+1. (plus) and (minus) strand genes are considered separately 
+2. The genes are considered as overlapping if the overlap length of the sequence is >=50% of the length of the smallest line.
+
+"""
+import argparse
+import logging
+from pathlib import Path
+
+import pandas
+
+from ensembl.utils import StrPath
+from ensembl.utils.argparse import ArgumentParser
+from ensembl.utils.logging import init_logging_with_args
+
+
+def update_gene_info(
+    update_gene: dict,
+    id_list: list,
+    merged_chromosome: str,
+    merged_gene_start: str,
+    merged_gene_end: str,
+    merged_strand: str,
+) -> dict:
+    """
+    Takes the id_list, other parameters from main, update the gene information and returns to the main
+
+    Args:
+        update_gene (dict): column names as keys and corresponding parameters are values in teh form of list
+        id_list (list): A list of ids from genes to merge
+        merged_chromosome: Chromosome number from the merged gene
+        merged_gene_start: New gene start coordinate after merging the gene
+        merged_gene_end: New gene end coordinate after merging the gene
+        merged_strand: Strand from the merged genes
+
+    Returns:
+        The updated gene.
+    """
+    update_gene["chr"].append(merged_chromosome)
+    update_gene["source"].append("panoryza")
+    update_gene["gene"].append("gene")
+    update_gene["start"].append(merged_gene_start)
+    update_gene["end"].append(merged_gene_end)
+    update_gene["score"].append(".")
+    update_gene["strand"].append(merged_strand)
+    update_gene["frame"].append(".")
+    merge_ids = ",".join(id_list)
+    update_gene["attribute"].append(merge_ids)
+    return update_gene
+
+
+def sort_genes(output_file: dict) -> pandas.DataFrame:
+    """Sort genes.
+
+    Args:
+        output_file: gene information in a dictionary
+    Returns:
+        A sorted gene matrix.
+
+    """
+    gene_matrix = pandas.DataFrame.from_dict(output_file)
+    gene_matrix["chr_no"] = gene_matrix["chr"].str.extract("(\d+)", expand=False).astype(int)
+    gene_matrix.sort_values(by=["chr_no", "start"], inplace=True)
+    gene_matrix.drop("chr_no", axis=1, inplace=True)
+    return gene_matrix
+
+
+def merge_genes(input_gff_file: StrPath) -> dict:
+    """Merge genes from the input GFF file.
+
+    Args:
+        input_gff_file: Path to the GFF file.
+
+    Returns:
+        The merged genes in a dictionary.
+    """
+
+    list_id = []
+    dict_merged_gene = {
+        "chr": [],
+        "source": [],
+        "gene": [],
+        "start": [],
+        "end": [],
+        "score": [],
+        "strand": [],
+        "frame": [],
+        "attribute": [],
+    }
+
+    logging.info("The input file is " + str(input_gff_file))
+
+    with open(input_gff_file) as gff_file:
+        lines = gff_file.readlines()
+        ## if line is starting with 'Chr' then split it and assign to different variables
+        for line in lines:
+            if line.startswith("#"):
+                continue
+            chromo, _, _, start, end, _, strand, _, description = line.split("\t")
+            gene_id = description.split(";Name=")[0].split(";biotype=")[0]
+            start = int(start)
+            end = int(end)
+
+            ### Assign the values for the first gene in the gff
+            if line == lines[0]:
+                gene_start = start
+                gene_end = end
+                current_chromosome = str(line.split("\t")[0])
+                current_strand = str(line.split("\t")[6])
+
+            ## Checking if the strand and chromosome are same between the previous and current line; otherwise print and start with the new gene
+            if strand != current_strand or chromo != current_chromosome:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                current_strand = strand
+                gene_start = start
+                gene_end = end
+                current_chromosome = chromo
+                list_id = []
+
+            ## if start of the next gene is lesser than the current gene, raise exception
+            if start < gene_start:
+                raise Exception("Error: File is not sorted properly")
+
+            ## if start coordinate of a gene is equal or greater than the previous AND if start coordinate is lesser than the gene end then start merging the genes
+            elif (start == gene_start or start > gene_start) and start < gene_end:
+                ## if end coord of current gene is lesser than the previous, then merge the gene into the previous one
+                if end <= gene_end:
+                    list_id.append(gene_id)
+
+                ## if end coord of current gene is greater than the previous, then calculate the length of both genes and check overlap criteria
+                elif end > gene_end:
+                    gene_length = gene_end - gene_start
+                    length = end - start
+                    min_length = min(length, gene_length)
+                    overlap_length = gene_end - start
+
+                    ## Checking the overlap criteria
+                    if int(overlap_length) * 2 >= int(min_length):
+                        gene_end = end
+                        list_id.append(gene_id)
+                        logging.info("overlap\n")
+                    else:
+                        dict_merged_gene = update_gene_info(
+                            dict_merged_gene,
+                            list_id,
+                            current_chromosome,
+                            gene_start,
+                            gene_end,
+                            current_strand,
+                        )
+                        list_id = []
+                        gene_start = start
+                        gene_end = end
+                        list_id.append(gene_id)
+
+            ## if start coord of a gene is greater than the end coord of the previous gene then print the previous gene (current gene will not merge into that)
+            if start >= gene_end:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+                list_id = []
+                gene_start = start
+                gene_end = end
+                list_id.append(gene_id)
+
+            ## If last line in the file, print the gene(s)
+            if line == lines[-1]:
+                dict_merged_gene = update_gene_info(
+                    dict_merged_gene, list_id, current_chromosome, gene_start, gene_end, current_strand
+                )
+    return dict_merged_gene
+
+
+def parse_args(arg_list: list[str] | None) -> argparse.Namespace:
+    """Parse the aruments from the command line.
+
+    Args:
+        arg_list: List of arguments to parse. If `None`, grab them from the command line.
+
+    Returns:
+        Parsed arguments.
+
+    """
+    parser = ArgumentParser(description="Merge genes script")
+    parser.add_argument_src_path(
+        "--input_gff_file",
+        default="sorted_all_line.gff",
+        help="Input file with gene info from RAPDB, MSU and Gramene",
+    )
+    parser.add_argument_dst_path(
+        "--output_dir", default=".", help="Output directory for the merged genes file"
+    )
+    parser.add_log_arguments()
+    return parser.parse_args(arg_list)
+
+
+def main(arg_list: list[str] | None = None) -> None:
+    """Main script entry point.
+
+    Args:
+        arg_list: Arguments to parse passing list to `parse_args()`.
+    """
+    args = parse_args(arg_list)
+    init_logging_with_args(args)
+
+    merge_genes_metadata = merge_genes(args.input_gff_file)
+    sorted_genes = sort_genes(merge_genes_metadata)
+    sorted_genes.to_csv(args.output_dir / "merged_genes.tsv", sep="\t", index=False)
+
+
+if __name__ == "__main__":
+    main()

From 0588d8482826aa08ca18ae93fbf605742d9d32d8 Mon Sep 17 00:00:00 2001
From: shradhasaraf <shradha@ebi.ac.uk>
Date: Mon, 14 Jul 2025 17:40:51 +0100
Subject: [PATCH 2/2] Added README and other files

---
 scripts/README.md           |  18 +++
 scripts/add_info.py         |  15 +--
 scripts/merged_genes.gff    | 248 ++++++++++++++++++++++++++++++++++++
 scripts/merged_genes.tsv    |   5 +
 scripts/sorted_all_gene.gff |  10 ++
 5 files changed, 285 insertions(+), 11 deletions(-)
 create mode 100644 scripts/README.md
 create mode 100644 scripts/merged_genes.gff
 create mode 100644 scripts/merged_genes.tsv
 create mode 100644 scripts/sorted_all_gene.gff

diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 000000000..a8a498f53
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,18 @@
+The script merge_genes.py merges gene from Nipponbare 3 annotation sources (Gramene, MSU and RAPDB) based on GFF (obtained after merging GFF from 3 annotations namely oryza_sativa_RAPDB.gff, oryza_sativa_MSU.gff and oryza_sativa_gramene.gff)
+
+Input: sorted_all_gene.gff, this file contain genes from all three annotation sources and sorted based on chromosome number and position.
+Output: merged_genes.tsv, contains merged genes in a tsv format.
+ 
+Script options can be explored with help option.
+
+`python merge_genes.py --help`
+
+For example:
+`python merge_genes.py --input_gff_file sorted_all_gene.gff --output_dir /homes/user/merged_genes.tsv`
+
+After running merge_genes.py, the information of mRNA, CDS, exon etc. is added for each gene by script add_info.py.
+
+Input: merged_genes.tsv, oryza_sativa_RAPDB.gff, oryza_sativa_MSU.gff and oryza_sativa_gramene.gff
+Output: merged_genes.gff
+
+Usage: `python add_info.py`
diff --git a/scripts/add_info.py b/scripts/add_info.py
index 68ee4b053..16b71765f 100644
--- a/scripts/add_info.py
+++ b/scripts/add_info.py
@@ -1,10 +1,10 @@
 import os
 import re
 
-file=open("sorted_combined.gff3","r")    
+file=open("merged_genes.tsv","r")    
 lines=file.readlines()
 
-output=open("full_gff.gff3", "w")
+output=open("merged_genes.gff", "w")
 list_id=[]
 list_mrna=[]
 main_id=""
@@ -46,7 +46,7 @@
             if gene_id.startswith('LOC_Os'):
                 cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_MSU.gff"
             if gene_id.startswith('gene:Osativa'):
-                cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_nipponbare.gff"
+                cmd="grep -P '\smRNA\s.+Parent=" + gene_id + "' oryza_sativa_gramene.gff"
             match=os.popen(cmd).read()
             list_mrna=re.split("\n",match)
             list_mrna.remove(list_mrna[-1])
@@ -61,15 +61,8 @@
                 if mrna_id.startswith('LOC_Os'):
                     cmd2="grep -w Parent=" + mrna_id + " oryza_sativa_MSU.gff"
                 if mrna_id.startswith('transcript:Osativa'):
-                    cmd2="grep -w  Parent=" + mrna_id + " oryza_sativa_nipponbare.gff"
+                    cmd2="grep -w  Parent=" + mrna_id + " oryza_sativa_gramene.gff"
                 exons_match=os.popen(cmd2).read()
                 output.write(new_mrna)
                 output.write("\n")
                 output.write(exons_match)
-
-
-
-
-
-
-
diff --git a/scripts/merged_genes.gff b/scripts/merged_genes.gff
new file mode 100644
index 000000000..41d9e716e
--- /dev/null
+++ b/scripts/merged_genes.gff
@@ -0,0 +1,248 @@
+Chr1	panoryza	gene	2903	10817	.	+	.	ID=Os01g0100100;Name=Os01g0100100
+Chr1	MSU_osa1r7	mRNA	2903	10817	.	+	.	ID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=Os01g0100100
+Chr1	MSU_osa1r7	exon	2903	3268	.	+	.	ID=LOC_Os01g01010.1:exon_1;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	3354	3616	.	+	.	ID=LOC_Os01g01010.1:exon_2;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	4357	4455	.	+	.	ID=LOC_Os01g01010.1:exon_3;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	5457	5560	.	+	.	ID=LOC_Os01g01010.1:exon_4;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	7136	7944	.	+	.	ID=LOC_Os01g01010.1:exon_5;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	8028	8150	.	+	.	ID=LOC_Os01g01010.1:exon_6;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	8232	8320	.	+	.	ID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	8408	8608	.	+	.	ID=LOC_Os01g01010.1:exon_8;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	9210	9617	.	+	.	ID=LOC_Os01g01010.1:exon_9;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	10104	10187	.	+	.	ID=LOC_Os01g01010.1:exon_10;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	10274	10430	.	+	.	ID=LOC_Os01g01010.1:exon_11;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	exon	10504	10817	.	+	.	ID=LOC_Os01g01010.1:exon_12;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	five_prime_UTR	2903	3268	.	+	.	ID=LOC_Os01g01010.1:utr_1;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	five_prime_UTR	3354	3448	.	+	.	ID=LOC_Os01g01010.1:utr_2;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	3449	3616	.	+	.	ID=LOC_Os01g01010.1:cds_1;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	4357	4455	.	+	.	ID=LOC_Os01g01010.1:cds_2;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	5457	5560	.	+	.	ID=LOC_Os01g01010.1:cds_3;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	7136	7944	.	+	.	ID=LOC_Os01g01010.1:cds_4;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	8028	8150	.	+	.	ID=LOC_Os01g01010.1:cds_5;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	8232	8320	.	+	.	ID=LOC_Os01g01010.1:cds_6;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	8408	8608	.	+	.	ID=LOC_Os01g01010.1:cds_7;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	9210	9617	.	+	.	ID=LOC_Os01g01010.1:cds_8;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	10104	10187	.	+	.	ID=LOC_Os01g01010.1:cds_9;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	CDS	10274	10297	.	+	.	ID=LOC_Os01g01010.1:cds_10;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	three_prime_UTR	10298	10430	.	+	.	ID=LOC_Os01g01010.1:utr_3;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	three_prime_UTR	10504	10817	.	+	.	ID=LOC_Os01g01010.1:utr_4;Parent=LOC_Os01g01010.1
+Chr1	MSU_osa1r7	mRNA	2984	10562	.	+	.	ID=LOC_Os01g01010.2;Name=LOC_Os01g01010.2;Parent=Os01g0100100
+Chr1	MSU_osa1r7	exon	2984	3255	.	+	.	ID=LOC_Os01g01010.2:exon_1;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	3354	3616	.	+	.	ID=LOC_Os01g01010.2:exon_2;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	4357	4455	.	+	.	ID=LOC_Os01g01010.2:exon_3;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	5457	5560	.	+	.	ID=LOC_Os01g01010.2:exon_4;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	7136	7944	.	+	.	ID=LOC_Os01g01010.2:exon_5;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	8028	8150	.	+	.	ID=LOC_Os01g01010.2:exon_6;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	8232	8320	.	+	.	ID=LOC_Os01g01010.2:exon_7;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	8408	8608	.	+	.	ID=LOC_Os01g01010.2:exon_8;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	9210	9311	.	+	.	ID=LOC_Os01g01010.2:exon_9;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	10104	10187	.	+	.	ID=LOC_Os01g01010.2:exon_10;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	10232	10244	.	+	.	ID=LOC_Os01g01010.2:exon_11;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	exon	10504	10562	.	+	.	ID=LOC_Os01g01010.2:exon_12;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	five_prime_UTR	2984	3255	.	+	.	ID=LOC_Os01g01010.2:utr_1;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	five_prime_UTR	3354	3448	.	+	.	ID=LOC_Os01g01010.2:utr_2;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	3449	3616	.	+	.	ID=LOC_Os01g01010.2:cds_1;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	4357	4455	.	+	.	ID=LOC_Os01g01010.2:cds_2;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	5457	5560	.	+	.	ID=LOC_Os01g01010.2:cds_3;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	7136	7944	.	+	.	ID=LOC_Os01g01010.2:cds_4;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	8028	8150	.	+	.	ID=LOC_Os01g01010.2:cds_5;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	8232	8320	.	+	.	ID=LOC_Os01g01010.2:cds_6;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	8408	8608	.	+	.	ID=LOC_Os01g01010.2:cds_7;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	9210	9311	.	+	.	ID=LOC_Os01g01010.2:cds_8;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	10104	10187	.	+	.	ID=LOC_Os01g01010.2:cds_9;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	10232	10244	.	+	.	ID=LOC_Os01g01010.2:cds_10;Parent=LOC_Os01g01010.2
+Chr1	MSU_osa1r7	CDS	10504	10562	.	+	.	ID=LOC_Os01g01010.2:cds_11;Parent=LOC_Os01g01010.2
+1	NAM	mRNA	2976	9671	.	+	.	ID=transcript:Osativa.01G000010_01;Parent=Os01g0100100
+1	NAM	five_prime_UTR	2976	3268	.	+	.	Parent=transcript:Osativa.01G000010_01
+1	NAM	five_prime_UTR	3354	3448	.	+	.	Parent=transcript:Osativa.01G000010_01
+1	NAM	exon	2976	3268	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000010_01.exon.1;rank=1
+1	NAM	exon	3354	3616	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.2;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.2;rank=2
+1	NAM	exon	4357	4455	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.3;rank=3
+1	NAM	exon	5457	5560	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000010_01.exon.4;rank=4
+1	NAM	exon	7136	7944	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.5;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_01.exon.5;rank=5
+1	NAM	exon	8028	8150	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.6;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_01.exon.6;rank=6
+1	NAM	exon	8232	8320	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.7;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.7;rank=7
+1	NAM	exon	8408	8629	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.8;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.8;rank=8
+1	NAM	exon	9210	9671	.	+	.	Parent=transcript:Osativa.01G000010_01;Name=Osativa.01G000010_01.exon.9;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_01.exon.9;rank=9
+1	NAM	CDS	3449	3616	.	+	0	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	4357	4455	.	+	0	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	5457	5560	.	+	0	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	7136	7944	.	+	1	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	8028	8150	.	+	2	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	8232	8320	.	+	2	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	8408	8629	.	+	0	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	CDS	9210	9671	.	+	0	ID=CDS:Osativa.01G000010_01;Parent=transcript:Osativa.01G000010_01;protein_id=Osativa.01G000010_01
+1	NAM	mRNA	2976	9671	.	+	.	ID=transcript:Osativa.01G000010_02;Parent=Os01g0100100
+1	NAM	five_prime_UTR	2976	3268	.	+	.	Parent=transcript:Osativa.01G000010_02
+1	NAM	five_prime_UTR	3354	3448	.	+	.	Parent=transcript:Osativa.01G000010_02
+1	NAM	exon	2976	3268	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000010_02.exon.1;rank=1
+1	NAM	exon	3354	3616	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.2;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.2;rank=2
+1	NAM	exon	4357	4455	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.3;rank=3
+1	NAM	exon	5457	5560	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000010_02.exon.4;rank=4
+1	NAM	exon	7136	7944	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.5;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_02.exon.5;rank=5
+1	NAM	exon	8028	8150	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.6;ensembl_end_phase=1;ensembl_phase=1;exon_id=Osativa.01G000010_02.exon.6;rank=6
+1	NAM	exon	8232	8320	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.7;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.7;rank=7
+1	NAM	exon	8408	8608	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.8;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.8;rank=8
+1	NAM	exon	9210	9671	.	+	.	Parent=transcript:Osativa.01G000010_02;Name=Osativa.01G000010_02.exon.9;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000010_02.exon.9;rank=9
+1	NAM	CDS	3449	3616	.	+	0	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	4357	4455	.	+	0	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	5457	5560	.	+	0	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	7136	7944	.	+	1	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	8028	8150	.	+	2	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	8232	8320	.	+	2	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	8408	8608	.	+	0	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+1	NAM	CDS	9210	9671	.	+	0	ID=CDS:Osativa.01G000010_02;Parent=transcript:Osativa.01G000010_02;protein_id=Osativa.01G000010_02
+chr01	irgsp1_rep	mRNA	2983	10815	.	+	.	ID=Os01t0100100-01;Name=Os01t0100100-01;Parent=Os01g0100100
+chr01	irgsp1_rep	exon	2983	3268	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	3354	3616	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	4357	4455	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	5457	5560	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	7136	7944	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	8028	8150	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	8232	8320	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	8408	8608	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	9210	9615	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	10102	10187	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	10274	10430	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	exon	10504	10815	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	five_prime_UTR	2983	3268	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	five_prime_UTR	3354	3448	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	3449	3616	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	4357	4455	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	5457	5560	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	7136	7944	.	+	1	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	8028	8150	.	+	2	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	8232	8320	.	+	2	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	8408	8608	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	9210	9615	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	10102	10187	.	+	2	Parent=Os01t0100100-01
+chr01	irgsp1_rep	CDS	10274	10297	.	+	0	Parent=Os01t0100100-01
+chr01	irgsp1_rep	three_prime_UTR	10298	10430	.	+	.	Parent=Os01t0100100-01
+chr01	irgsp1_rep	three_prime_UTR	10504	10815	.	+	.	Parent=Os01t0100100-01
+Chr1	panoryza	gene	11218	12435	.	+	.	ID=Os01g0100200;Name=Os01g0100200
+chr01	irgsp1_rep	mRNA	11218	12435	.	+	.	ID=Os01t0100200-01;Name=Os01t0100200-01;Parent=Os01g0100200
+chr01	irgsp1_rep	exon	11218	12060	.	+	.	Parent=Os01t0100200-01
+chr01	irgsp1_rep	exon	12152	12435	.	+	.	Parent=Os01t0100200-01
+chr01	irgsp1_rep	five_prime_UTR	11218	11797	.	+	.	Parent=Os01t0100200-01
+chr01	irgsp1_rep	CDS	11798	12060	.	+	0	Parent=Os01t0100200-01
+chr01	irgsp1_rep	CDS	12152	12317	.	+	1	Parent=Os01t0100200-01
+chr01	irgsp1_rep	three_prime_UTR	12318	12435	.	+	.	Parent=Os01t0100200-01
+Chr1	MSU_osa1r7	mRNA	11218	12435	.	+	.	ID=LOC_Os01g01019.1;Name=LOC_Os01g01019.1;Parent=Os01g0100200
+Chr1	MSU_osa1r7	exon	11218	12060	.	+	.	ID=LOC_Os01g01019.1:exon_1;Parent=LOC_Os01g01019.1
+Chr1	MSU_osa1r7	exon	12152	12435	.	+	.	ID=LOC_Os01g01019.1:exon_2;Parent=LOC_Os01g01019.1
+Chr1	MSU_osa1r7	five_prime_UTR	11218	11797	.	+	.	ID=LOC_Os01g01019.1:utr_1;Parent=LOC_Os01g01019.1
+Chr1	MSU_osa1r7	CDS	11798	12060	.	+	.	ID=LOC_Os01g01019.1:cds_1;Parent=LOC_Os01g01019.1
+Chr1	MSU_osa1r7	CDS	12152	12317	.	+	.	ID=LOC_Os01g01019.1:cds_2;Parent=LOC_Os01g01019.1
+Chr1	MSU_osa1r7	three_prime_UTR	12318	12435	.	+	.	ID=LOC_Os01g01019.1:utr_2;Parent=LOC_Os01g01019.1
+1	NAM	mRNA	11371	12435	.	+	.	ID=transcript:Osativa.01G000020_01;Parent=Os01g0100200
+1	NAM	five_prime_UTR	11371	11797	.	+	.	Parent=transcript:Osativa.01G000020_01
+1	NAM	exon	11371	12060	.	+	.	Parent=transcript:Osativa.01G000020_01;Name=Osativa.01G000020_01.exon.1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000020_01.exon.1;rank=1
+1	NAM	exon	12152	12435	.	+	.	Parent=transcript:Osativa.01G000020_01;Name=Osativa.01G000020_01.exon.2;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000020_01.exon.2;rank=2
+1	NAM	CDS	11798	12060	.	+	0	ID=CDS:Osativa.01G000020_01;Parent=transcript:Osativa.01G000020_01;protein_id=Osativa.01G000020_01
+1	NAM	CDS	12152	12317	.	+	1	ID=CDS:Osativa.01G000020_01;Parent=transcript:Osativa.01G000020_01;protein_id=Osativa.01G000020_01
+1	NAM	three_prime_UTR	12318	12435	.	+	.	Parent=transcript:Osativa.01G000020_01
+Chr1	panoryza	gene	12648	15915	.	+	.	ID=Os01g0100400;Name=Os01g0100400
+Chr1	MSU_osa1r7	mRNA	12648	15915	.	+	.	ID=LOC_Os01g01030.1;Name=LOC_Os01g01030.1;Parent=Os01g0100400
+Chr1	MSU_osa1r7	exon	12648	13813	.	+	.	ID=LOC_Os01g01030.1:exon_1;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	exon	13906	14271	.	+	.	ID=LOC_Os01g01030.1:exon_2;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	exon	14359	14437	.	+	.	ID=LOC_Os01g01030.1:exon_3;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	exon	14969	15171	.	+	.	ID=LOC_Os01g01030.1:exon_4;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	exon	15266	15915	.	+	.	ID=LOC_Os01g01030.1:exon_5;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	five_prime_UTR	12648	12773	.	+	.	ID=LOC_Os01g01030.1:utr_1;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	CDS	12774	13813	.	+	.	ID=LOC_Os01g01030.1:cds_1;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	CDS	13906	14271	.	+	.	ID=LOC_Os01g01030.1:cds_2;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	CDS	14359	14437	.	+	.	ID=LOC_Os01g01030.1:cds_3;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	CDS	14969	15171	.	+	.	ID=LOC_Os01g01030.1:cds_4;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	CDS	15266	15359	.	+	.	ID=LOC_Os01g01030.1:cds_5;Parent=LOC_Os01g01030.1
+Chr1	MSU_osa1r7	three_prime_UTR	15360	15915	.	+	.	ID=LOC_Os01g01030.1:utr_2;Parent=LOC_Os01g01030.1
+chr01	irgsp1_rep	mRNA	12721	15685	.	+	.	ID=Os01t0100400-01;Name=Os01t0100400-01;Parent=Os01g0100400
+chr01	irgsp1_rep	exon	12721	13813	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	exon	13906	14271	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	exon	14359	14437	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	exon	14969	15171	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	exon	15266	15685	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	five_prime_UTR	12721	12773	.	+	.	Parent=Os01t0100400-01
+chr01	irgsp1_rep	CDS	12774	13813	.	+	0	Parent=Os01t0100400-01
+chr01	irgsp1_rep	CDS	13906	14271	.	+	1	Parent=Os01t0100400-01
+chr01	irgsp1_rep	CDS	14359	14437	.	+	1	Parent=Os01t0100400-01
+chr01	irgsp1_rep	CDS	14969	15171	.	+	0	Parent=Os01t0100400-01
+chr01	irgsp1_rep	CDS	15266	15359	.	+	1	Parent=Os01t0100400-01
+chr01	irgsp1_rep	three_prime_UTR	15360	15685	.	+	.	Parent=Os01t0100400-01
+1	NAM	mRNA	12721	15915	.	+	.	ID=transcript:Osativa.01G000040_01;Parent=Os01g0100400
+1	NAM	five_prime_UTR	12721	12773	.	+	.	Parent=transcript:Osativa.01G000040_01
+1	NAM	exon	12721	13813	.	+	.	Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.1;rank=1
+1	NAM	exon	13906	14271	.	+	.	Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.2;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.2;rank=2
+1	NAM	exon	14359	14437	.	+	.	Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.3;ensembl_end_phase=0;ensembl_phase=0;exon_id=Osativa.01G000040_01.exon.3;rank=3
+1	NAM	exon	14969	15171	.	+	.	Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.4;ensembl_end_phase=2;ensembl_phase=2;exon_id=Osativa.01G000040_01.exon.4;rank=4
+1	NAM	exon	15266	15915	.	+	.	Parent=transcript:Osativa.01G000040_01;Name=Osativa.01G000040_01.exon.5;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Osativa.01G000040_01.exon.5;rank=5
+1	NAM	CDS	12774	13813	.	+	0	ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01
+1	NAM	CDS	13906	14271	.	+	1	ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01
+1	NAM	CDS	14359	14437	.	+	1	ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01
+1	NAM	CDS	14969	15171	.	+	0	ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01
+1	NAM	CDS	15266	15359	.	+	1	ID=CDS:Osativa.01G000040_01;Parent=transcript:Osativa.01G000040_01;protein_id=Osativa.01G000040_01
+1	NAM	three_prime_UTR	15360	15915	.	+	.	Parent=transcript:Osativa.01G000040_01
+Chr1	panoryza	gene	16292	20323	.	+	.	ID=LOC_Os01g01040;Name=LOC_Os01g01040
+Chr1	MSU_osa1r7	mRNA	16292	20323	.	+	.	ID=LOC_Os01g01040.1;Name=LOC_Os01g01040.1;Parent=LOC_Os01g01040
+Chr1	MSU_osa1r7	exon	16292	16976	.	+	.	ID=LOC_Os01g01040.1:exon_1;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	17383	17474	.	+	.	ID=LOC_Os01g01040.1:exon_2;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	17558	18258	.	+	.	ID=LOC_Os01g01040.1:exon_3;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	18501	18571	.	+	.	ID=LOC_Os01g01040.1:exon_4;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	18968	19057	.	+	.	ID=LOC_Os01g01040.1:exon_5;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	19142	19321	.	+	.	ID=LOC_Os01g01040.1:exon_6;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	19531	19629	.	+	.	ID=LOC_Os01g01040.1:exon_7;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	exon	19734	20323	.	+	.	ID=LOC_Os01g01040.1:exon_8;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	five_prime_UTR	16292	16598	.	+	.	ID=LOC_Os01g01040.1:utr_1;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	16599	16976	.	+	.	ID=LOC_Os01g01040.1:cds_1;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	17383	17474	.	+	.	ID=LOC_Os01g01040.1:cds_2;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	17558	18258	.	+	.	ID=LOC_Os01g01040.1:cds_3;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	18501	18571	.	+	.	ID=LOC_Os01g01040.1:cds_4;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	18968	19057	.	+	.	ID=LOC_Os01g01040.1:cds_5;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	19142	19321	.	+	.	ID=LOC_Os01g01040.1:cds_6;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	CDS	19531	19593	.	+	.	ID=LOC_Os01g01040.1:cds_7;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	three_prime_UTR	19594	19629	.	+	.	ID=LOC_Os01g01040.1:utr_2;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	three_prime_UTR	19734	20323	.	+	.	ID=LOC_Os01g01040.1:utr_3;Parent=LOC_Os01g01040.1
+Chr1	MSU_osa1r7	mRNA	16321	20323	.	+	.	ID=LOC_Os01g01040.2;Name=LOC_Os01g01040.2;Parent=LOC_Os01g01040
+Chr1	MSU_osa1r7	exon	16321	16976	.	+	.	ID=LOC_Os01g01040.2:exon_1;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	17383	17474	.	+	.	ID=LOC_Os01g01040.2:exon_2;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	17558	18258	.	+	.	ID=LOC_Os01g01040.2:exon_3;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	18501	18571	.	+	.	ID=LOC_Os01g01040.2:exon_4;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	18968	19057	.	+	.	ID=LOC_Os01g01040.2:exon_5;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	19142	19321	.	+	.	ID=LOC_Os01g01040.2:exon_6;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	19531	19629	.	+	.	ID=LOC_Os01g01040.2:exon_7;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	exon	19731	20323	.	+	.	ID=LOC_Os01g01040.2:exon_8;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	five_prime_UTR	16321	16598	.	+	.	ID=LOC_Os01g01040.2:utr_1;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	16599	16976	.	+	.	ID=LOC_Os01g01040.2:cds_1;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	17383	17474	.	+	.	ID=LOC_Os01g01040.2:cds_2;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	17558	18258	.	+	.	ID=LOC_Os01g01040.2:cds_3;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	18501	18571	.	+	.	ID=LOC_Os01g01040.2:cds_4;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	18968	19057	.	+	.	ID=LOC_Os01g01040.2:cds_5;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	19142	19321	.	+	.	ID=LOC_Os01g01040.2:cds_6;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	CDS	19531	19593	.	+	.	ID=LOC_Os01g01040.2:cds_7;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	three_prime_UTR	19594	19629	.	+	.	ID=LOC_Os01g01040.2:utr_2;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	three_prime_UTR	19731	20323	.	+	.	ID=LOC_Os01g01040.2:utr_3;Parent=LOC_Os01g01040.2
+Chr1	MSU_osa1r7	mRNA	16321	20323	.	+	.	ID=LOC_Os01g01040.3;Name=LOC_Os01g01040.3;Parent=LOC_Os01g01040
+Chr1	MSU_osa1r7	exon	16321	16976	.	+	.	ID=LOC_Os01g01040.3:exon_1;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	17383	17474	.	+	.	ID=LOC_Os01g01040.3:exon_2;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	17558	18258	.	+	.	ID=LOC_Os01g01040.3:exon_3;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	18501	18571	.	+	.	ID=LOC_Os01g01040.3:exon_4;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	18968	19057	.	+	.	ID=LOC_Os01g01040.3:exon_5;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	19142	19321	.	+	.	ID=LOC_Os01g01040.3:exon_6;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	exon	19531	20323	.	+	.	ID=LOC_Os01g01040.3:exon_7;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	five_prime_UTR	16321	16598	.	+	.	ID=LOC_Os01g01040.3:utr_1;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	16599	16976	.	+	.	ID=LOC_Os01g01040.3:cds_1;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	17383	17474	.	+	.	ID=LOC_Os01g01040.3:cds_2;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	17558	18258	.	+	.	ID=LOC_Os01g01040.3:cds_3;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	18501	18571	.	+	.	ID=LOC_Os01g01040.3:cds_4;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	18968	19057	.	+	.	ID=LOC_Os01g01040.3:cds_5;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	19142	19321	.	+	.	ID=LOC_Os01g01040.3:cds_6;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	CDS	19531	19593	.	+	.	ID=LOC_Os01g01040.3:cds_7;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	three_prime_UTR	19594	20323	.	+	.	ID=LOC_Os01g01040.3:utr_2;Parent=LOC_Os01g01040.3
+Chr1	MSU_osa1r7	mRNA	16292	18304	.	+	.	ID=LOC_Os01g01040.4;Name=LOC_Os01g01040.4;Parent=LOC_Os01g01040
+Chr1	MSU_osa1r7	exon	16292	16976	.	+	.	ID=LOC_Os01g01040.4:exon_1;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	exon	17383	17474	.	+	.	ID=LOC_Os01g01040.4:exon_2;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	exon	17558	18304	.	+	.	ID=LOC_Os01g01040.4:exon_3;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	five_prime_UTR	16292	16598	.	+	.	ID=LOC_Os01g01040.4:utr_1;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	CDS	16599	16976	.	+	.	ID=LOC_Os01g01040.4:cds_1;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	CDS	17383	17474	.	+	.	ID=LOC_Os01g01040.4:cds_2;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	CDS	17558	18272	.	+	.	ID=LOC_Os01g01040.4:cds_3;Parent=LOC_Os01g01040.4
+Chr1	MSU_osa1r7	three_prime_UTR	18273	18304	.	+	.	ID=LOC_Os01g01040.4:utr_2;Parent=LOC_Os01g01040.4
diff --git a/scripts/merged_genes.tsv b/scripts/merged_genes.tsv
new file mode 100644
index 000000000..696f97e40
--- /dev/null
+++ b/scripts/merged_genes.tsv
@@ -0,0 +1,5 @@
+chr	source	gene	start	end	score	strand	frame	attribute
+Chr1	panoryza	gene	2903	10817	.	+	.	ID=LOC_Os01g01010,ID=gene:Osativa.01G000010,ID=Os01g0100100
+Chr1	panoryza	gene	11218	12435	.	+	.	ID=Os01g0100200,ID=LOC_Os01g01019,ID=gene:Osativa.01G000020
+Chr1	panoryza	gene	12648	15915	.	+	.	ID=LOC_Os01g01030,ID=Os01g0100400,ID=gene:Osativa.01G000040
+Chr1	panoryza	gene	16292	20323	.	+	.	ID=LOC_Os01g01040
diff --git a/scripts/sorted_all_gene.gff b/scripts/sorted_all_gene.gff
new file mode 100644
index 000000000..30fee71c1
--- /dev/null
+++ b/scripts/sorted_all_gene.gff
@@ -0,0 +1,10 @@
+Chr1	MSU_osa1r7	gene	2903	10817	.	+	.	ID=LOC_Os01g01010;Name=LOC_Os01g01010;Note=TBC%20domain%20containing%20protein%2C%20expressed
+Chr1	NAM	gene	2976	9671	.	+	.	ID=gene:Osativa.01G000010;biotype=protein_coding;logic_name=oryza_cshl
+Chr1	irgsp1_locus	gene	2983	10815	.	+	.	ID=Os01g0100100;Name=Os01g0100100;Note=RabGAP/TBC domain containing protein. (Os01t0100100-01);Transcript variants=Os01t0100100-01
+Chr1	irgsp1_locus	gene	11218	12435	.	+	.	ID=Os01g0100200;Name=Os01g0100200;Note=Conserved hypothetical protein. (Os01t0100200-01);Transcript variants=Os01t0100200-01
+Chr1	MSU_osa1r7	gene	11218	12435	.	+	.	ID=LOC_Os01g01019;Name=LOC_Os01g01019;Note=expressed%20protein
+Chr1	NAM	gene	11371	12435	.	+	.	ID=gene:Osativa.01G000020;biotype=protein_coding;logic_name=oryza_cshl
+Chr1	MSU_osa1r7	gene	12648	15915	.	+	.	ID=LOC_Os01g01030;Name=LOC_Os01g01030;Note=monocopper%20oxidase%2C%20putative%2C%20expressed
+Chr1	irgsp1_locus	gene	12721	15685	.	+	.	ID=Os01g0100400;Name=Os01g0100400;Note=Similar to Pectinesterase-like protein. (Os01t0100400-01);Transcript variants=Os01t0100400-01
+Chr1	NAM	gene	12721	15915	.	+	.	ID=gene:Osativa.01G000040;biotype=protein_coding;logic_name=oryza_cshl
+Chr1	MSU_osa1r7	gene	16292	20323	.	+	.	ID=LOC_Os01g01040;Name=LOC_Os01g01040;Note=expressed%20protein