From 66c90b17989958a69ce459ec7b81c0a755ce571d Mon Sep 17 00:00:00 2001 From: Dakota Blair Date: Tue, 8 Apr 2025 14:07:28 -0400 Subject: [PATCH 1/3] Updated report to include assembly ref. --- kbase.yml | 4 ++-- lib/kb_ObjectInfo/CreateAssemblyReport.py | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/kbase.yml b/kbase.yml index 4fda497..684f73d 100644 --- a/kbase.yml +++ b/kbase.yml @@ -8,8 +8,8 @@ service-language: python module-version: - 1.2.2 + 1.2.3 owners: - [landml] + [dakota, landml] diff --git a/lib/kb_ObjectInfo/CreateAssemblyReport.py b/lib/kb_ObjectInfo/CreateAssemblyReport.py index ff49722..0e1346f 100644 --- a/lib/kb_ObjectInfo/CreateAssemblyReport.py +++ b/lib/kb_ObjectInfo/CreateAssemblyReport.py @@ -34,17 +34,18 @@ def assembly_metadata(self,obj_list): this_list = [['Assembly Name','Assembly ID', 'DNA size', 'GC content', 'Number contigs', 'FastA handle reference', 'MD5', 'Type', 'Taxon reference','Original filename']] - list = ['assembly_id', 'dna_size', 'gc_content', 'num_contigs', + items = ['assembly_id', 'dna_size', 'gc_content', 'num_contigs', 'fasta_handle_ref', 'md5', 'type', 'taxon_ref'] + # DataFileUtil guarantees that 'info' contains object_info: + # https://github.com/kbaseapps/DataFileUtil/blob/master/DataFileUtil.spec#L499-L506 for assembly in obj_list['data']: - name = "Assembly Data Object" - if 'info' in assembly: - name = assembly['info'][1] + name = assembly['info'][1] + ref = f"{assembly['info'][6]}/{assembly['info'][0]}/{assembly['info'][4]}" # Create the row for the one assembly - assem_list = [name] - for item in list: + assem_list = [name, ref] + for item in items: if item in assembly['data']: assem_list.append(str(assembly['data'][item])) else: From 8b8367a304fc335fcb36b1d0ac17b782a0f10450 Mon Sep 17 00:00:00 2001 From: Dakota Blair Date: Thu, 10 Apr 2025 14:34:51 -0400 Subject: [PATCH 2/3] Fixed whitespace, enabled assembly_set test. --- lib/kb_ObjectInfo/CreateAssemblyReport.py | 50 +++++++++++------------ test/kb_ObjectInfo_test.py | 18 ++++---- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/lib/kb_ObjectInfo/CreateAssemblyReport.py b/lib/kb_ObjectInfo/CreateAssemblyReport.py index 0e1346f..d64da2f 100644 --- a/lib/kb_ObjectInfo/CreateAssemblyReport.py +++ b/lib/kb_ObjectInfo/CreateAssemblyReport.py @@ -10,11 +10,11 @@ def __init__(self, config): self.callback_url = os.environ['SDK_CALLBACK_URL'] self.dfu = DataFileUtil(self.callback_url) - + def assembly_overview(self,obj_list): header = "OVERVIEW" this_list = [["Assembly Name","Type","Assembly Type"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" type_assem = '' @@ -26,22 +26,22 @@ def assembly_overview(self,obj_list): type_assem = assembly['data']['type'] this_list.extend([[name,object_type,type_assem]]) - + return (header,this_list) - + def assembly_metadata(self,obj_list): header = "METADATA" this_list = [['Assembly Name','Assembly ID', 'DNA size', 'GC content', 'Number contigs', 'FastA handle reference', 'MD5', 'Type', 'Taxon reference','Original filename']] - + items = ['assembly_id', 'dna_size', 'gc_content', 'num_contigs', 'fasta_handle_ref', 'md5', 'type', 'taxon_ref'] - + # DataFileUtil guarantees that 'info' contains object_info: # https://github.com/kbaseapps/DataFileUtil/blob/master/DataFileUtil.spec#L499-L506 for assembly in obj_list['data']: name = assembly['info'][1] - ref = f"{assembly['info'][6]}/{assembly['info'][0]}/{assembly['info'][4]}" + ref = f"{assembly['info'][6]}/{assembly['info'][0]}/{assembly['info'][4]}" # Create the row for the one assembly assem_list = [name, ref] @@ -55,18 +55,18 @@ def assembly_metadata(self,obj_list): assem_list.append(assembly['data']['fasta_handle_info']['node_file_name']) else: assem_list.append(" ") - + # Add the row to the list that will be returned this_list.extend([assem_list]) - + return (header,this_list) - + def assembly_dnabases(self,obj_list): header = "DNA Composition" this_list = [["Assembly Name","Total DNA Bases", "A Count","A Percent","C Count","C Percent", "G Count","G Percent","T Count","T Percent"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" if 'info' in assembly: @@ -75,32 +75,32 @@ def assembly_dnabases(self,obj_list): pct = 1.00 if 'dna_size' in assembly['data']: dna_size = assembly['data']['dna_size'] - + assem_list = [name,str(dna_size)] for base in ["A","C","G","T"]: pct = round(100 * assembly['data']['base_counts'][base] / dna_size,2) assem_list.append(str(assembly['data']['base_counts'][base])) assem_list.append(str(pct)) - + this_list.extend([assem_list]) - + return(header,this_list) - + def assembly_contigs(self,obj_list): header = "Contigs in the Assembly" this_list= [["Assembly Name","Contig Name","Length","GC content","Number of Ns","Contig ID","Description"]] - + for assembly in obj_list['data']: name = "Assembly Data Object" if 'info' in assembly: name = assembly['info'][1] - + if 'contigs' in assembly['data']: myContig = assembly['data']['contigs'] for ctg in myContig: list = ['length', 'gc_content', 'Ncount', 'contig_id', 'description'] ctg_list = [name,ctg] - + for item in list: if item in myContig[ctg]: ctg_list.append(format(myContig[ctg][item])) @@ -108,16 +108,16 @@ def assembly_contigs(self,obj_list): ctg_list.append("") this_list.append(ctg_list) - + return(header,this_list) - - + + def assembly_dna(self,obj_list,scratch): header = "Contig FastA files found in the download files." fasta_list = [] dna_string = "" cf = CreateFasta(self.config) - + for assembly in obj_list['data']: name = "Assembly Data Object" input_ref = '' @@ -127,7 +127,7 @@ def assembly_dna(self,obj_list,scratch): fasta_list = cf.get_assembly_sequence(input_ref) report_path = os.path.join(scratch, name + '.fna') - + # Write the DNA string out to a Fasta file report_txt = open(report_path, "w") for dna_seq in fasta_list: @@ -135,6 +135,6 @@ def assembly_dna(self,obj_list,scratch): report_txt.write(dna) dna_string += dna report_txt.close() - + return(header) - + diff --git a/test/kb_ObjectInfo_test.py b/test/kb_ObjectInfo_test.py index cf23b96..0b2def0 100644 --- a/test/kb_ObjectInfo_test.py +++ b/test/kb_ObjectInfo_test.py @@ -82,7 +82,7 @@ def setUpClass(cls): "generate_ids_if_needed": 1, "generate_missing_genes": 1 })['genome_ref'] - + # Prepare the Genome from gbff File cls.genbank_file_name = 'Carsonella_ruddii_HT_isolate_Thao2000.gbff' # Set the path to file in scratch @@ -251,9 +251,9 @@ def mytest_assembly_metadata(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - - def mytest_assembly_set(self): - assemblyset_ref = '69870/28/2' + + def test_assembly_set(self): + assemblyset_ref = '72131/16/1' ret = self.getImpl().assemblyset_report(self.getContext(), {'workspace_name': self.ws_info[1], 'input_ref': assemblyset_ref, @@ -264,7 +264,7 @@ def mytest_assembly_set(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_genome_protein_list(self): genome_ref = self.genome_ref genome_ref = '40843/4/1' @@ -281,7 +281,7 @@ def mytest_genome_protein_list(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_genome_protein_fasta(self): ret = self.getImpl().genome_report(self.getContext(), {'workspace_name': self.ws_info[1], @@ -394,7 +394,7 @@ def mytest_featureSet_unordered(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_sequenceSet(self): featset_ref = '27092/23/1' ret = self.getImpl().featseq_report(self.getContext(), @@ -405,7 +405,7 @@ def mytest_sequenceSet(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_ProtComp(self): protcomp_ref = '29939/15/1' ret = self.getImpl().protcomp_report(self.getContext(), @@ -428,7 +428,7 @@ def mytest_genomeComp(self): self.assertIn('report_name', ret[0]) self.assertIn('report_ref', ret[0]) pass - + def mytest_MSA(self): msa_ref = '70362/27/1' ret = self.getImpl().msa_report(self.getContext(), From bf98d13575e93b6cfb9f7e1ba22d63d60d970615 Mon Sep 17 00:00:00 2001 From: Dakota Blair Date: Thu, 10 Apr 2025 16:53:12 -0400 Subject: [PATCH 3/3] Fix columns. --- lib/kb_ObjectInfo/CreateAssemblyReport.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/kb_ObjectInfo/CreateAssemblyReport.py b/lib/kb_ObjectInfo/CreateAssemblyReport.py index d64da2f..d3eaee6 100644 --- a/lib/kb_ObjectInfo/CreateAssemblyReport.py +++ b/lib/kb_ObjectInfo/CreateAssemblyReport.py @@ -34,7 +34,7 @@ def assembly_metadata(self,obj_list): this_list = [['Assembly Name','Assembly ID', 'DNA size', 'GC content', 'Number contigs', 'FastA handle reference', 'MD5', 'Type', 'Taxon reference','Original filename']] - items = ['assembly_id', 'dna_size', 'gc_content', 'num_contigs', + items = ['dna_size', 'gc_content', 'num_contigs', 'fasta_handle_ref', 'md5', 'type', 'taxon_ref'] # DataFileUtil guarantees that 'info' contains object_info: @@ -137,4 +137,3 @@ def assembly_dna(self,obj_list,scratch): report_txt.close() return(header) -