diff --git a/kb_SPAdes.spec b/kb_SPAdes.spec index 53399b4..433fbc9 100644 --- a/kb_SPAdes.spec +++ b/kb_SPAdes.spec @@ -15,7 +15,7 @@ module kb_SPAdes { /* A boolean. 0 = false, anything else = true. */ typedef int bool; - + /* The workspace object name of a PairedEndLibrary file, whether of the KBaseAssembly or KBaseFile type. */ @@ -96,7 +96,7 @@ module kb_SPAdes { (all values must be odd, less than 128 and listed in ascending order) In the absence of these values, K values are automatically selected. min_contig_length - integer to filter out contigs with length < min_contig_length - from the HybridSPAdes output. Default value is 0 implying no filter. + from the HybridSPAdes output. Default value is 0 implying no filter. @optional dna_source @optional pipeline_options @optional kmer_sizes @@ -126,7 +126,7 @@ module kb_SPAdes { string report_name; string report_ref; } SPAdesOutput; - + /* Run SPAdes on paired end libraries */ funcdef run_SPAdes(SPAdesParams params) returns(SPAdesOutput output) authentication required; @@ -139,16 +139,6 @@ module kb_SPAdes { funcdef run_metaSPAdes(SPAdesParams params) returns(SPAdesOutput output) authentication required; - /* - params - the params used to run metaSPAdes. - use_defaults - (optional, def 0) if 1, just return the default requirements - use_heuristic - (optional, def 1) if 1, only use a heuristic based on the reads metadata to perform estimates - */ - typedef structure { - SPAdesParams params; - int use_defaults; - } MetaSPAdesEstimatorParams; - /* cpus - the number of CPUs required for the run memory - the minimal amount of memory in MB required for the run @@ -159,8 +149,8 @@ module kb_SPAdes { int memory; int walltime; } MetaSPAdesEstimate; - - funcdef estimate_metaSPAdes_requirements(MetaSPAdesEstimatorParams params) returns + + funcdef estimate_metaSPAdes_requirements(SPAdesParams params) returns (MetaSPAdesEstimate results) authentication required; }; diff --git a/lib/kb_SPAdes/kb_SPAdesClient.pm b/lib/kb_SPAdes/kb_SPAdesClient.pm index 094c44e..c829a94 100644 --- a/lib/kb_SPAdes/kb_SPAdesClient.pm +++ b/lib/kb_SPAdes/kb_SPAdesClient.pm @@ -469,11 +469,8 @@ Run SPAdes on paired end libraries for metagenomes =begin html
-$params is a kb_SPAdes.MetaSPAdesEstimatorParams
+$params is a kb_SPAdes.SPAdesParams
 $results is a kb_SPAdes.MetaSPAdesEstimate
-MetaSPAdesEstimatorParams is a reference to a hash where the following keys are defined:
-	params has a value which is a kb_SPAdes.SPAdesParams
-	use_defaults has a value which is an int
 SPAdesParams is a reference to a hash where the following keys are defined:
 	workspace_name has a value which is a string
 	output_contigset_name has a value which is a string
@@ -495,11 +492,8 @@ MetaSPAdesEstimate is a reference to a hash where the following keys are defined
 
 =begin text
 
-$params is a kb_SPAdes.MetaSPAdesEstimatorParams
+$params is a kb_SPAdes.SPAdesParams
 $results is a kb_SPAdes.MetaSPAdesEstimate
-MetaSPAdesEstimatorParams is a reference to a hash where the following keys are defined:
-	params has a value which is a kb_SPAdes.SPAdesParams
-	use_defaults has a value which is an int
 SPAdesParams is a reference to a hash where the following keys are defined:
 	workspace_name has a value which is a string
 	output_contigset_name has a value which is a string
@@ -924,7 +918,7 @@ kmer_sizes - (optional) K-mer sizes, Default values: 21, 33, 55, 77, 99, 127
                  (all values must be odd, less than 128 and listed in ascending order)
                  In the absence of these values, K values are automatically selected.
 min_contig_length - integer to filter out contigs with length < min_contig_length
-                 from the HybridSPAdes output. Default value is 0 implying no filter.    
+                 from the HybridSPAdes output. Default value is 0 implying no filter.
 @optional dna_source
 @optional pipeline_options
 @optional kmer_sizes
@@ -1011,45 +1005,6 @@ report_ref has a value which is a string
 
 
 
-=head2 MetaSPAdesEstimatorParams
-
-=over 4
-
-
-
-=item Description
-
-params - the params used to run metaSPAdes.
-use_defaults - (optional, def 0) if 1, just return the default requirements
-use_heuristic - (optional, def 1) if 1, only use a heuristic based on the reads metadata to perform estimates
-
-
-=item Definition
-
-=begin html
-
-
-a reference to a hash where the following keys are defined:
-params has a value which is a kb_SPAdes.SPAdesParams
-use_defaults has a value which is an int
-
-
- -=end html - -=begin text - -a reference to a hash where the following keys are defined: -params has a value which is a kb_SPAdes.SPAdesParams -use_defaults has a value which is an int - - -=end text - -=back - - - =head2 MetaSPAdesEstimate =over 4 diff --git a/lib/kb_SPAdes/kb_SPAdesClient.py b/lib/kb_SPAdes/kb_SPAdesClient.py index d5a1b04..0f7a709 100644 --- a/lib/kb_SPAdes/kb_SPAdesClient.py +++ b/lib/kb_SPAdes/kb_SPAdesClient.py @@ -167,15 +167,10 @@ def run_metaSPAdes(self, params, context=None): def estimate_metaSPAdes_requirements(self, params, context=None): """ - :param params: instance of type "MetaSPAdesEstimatorParams" (params - - the params used to run metaSPAdes. use_defaults - (optional, def - 0) if 1, just return the default requirements use_heuristic - - (optional, def 1) if 1, only use a heuristic based on the reads - metadata to perform estimates) -> structure: parameter "params" of - type "SPAdesParams" (Input parameters for running SPAdes. - workspace_name - the name of the workspace from which to take - input and store output. output_contigset_name - the name of the - output contigset read_libraries - a list of Illumina + :param params: instance of type "SPAdesParams" (Input parameters for + running SPAdes. workspace_name - the name of the workspace from + which to take input and store output. output_contigset_name - the + name of the output contigset read_libraries - a list of Illumina PairedEndLibrary files in FASTQ or BAM format. dna_source - (optional) the source of the DNA used for sequencing 'single_cell': DNA amplified from a single cell via MDA anything @@ -195,7 +190,7 @@ def estimate_metaSPAdes_requirements(self, params, context=None): "dna_source" of String, parameter "min_contig_length" of Long, parameter "kmer_sizes" of list of Long, parameter "skip_error_correction" of type "bool" (A boolean. 0 = false, - anything else = true.), parameter "use_defaults" of Long + anything else = true.) :returns: instance of type "MetaSPAdesEstimate" (cpus - the number of CPUs required for the run memory - the minimal amount of memory in MB required for the run walltime - an estimate for walltime in diff --git a/lib/kb_SPAdes/kb_SPAdesImpl.py b/lib/kb_SPAdes/kb_SPAdesImpl.py index b0bba92..a4bb793 100644 --- a/lib/kb_SPAdes/kb_SPAdesImpl.py +++ b/lib/kb_SPAdes/kb_SPAdesImpl.py @@ -55,9 +55,9 @@ class kb_SPAdes: # state. A method could easily clobber the state set by another while # the latter method is running. ######################################### noqa - VERSION = "1.2.3" + VERSION = "1.3.0" GIT_URL = "https://github.com/briehl/kb_SPAdes" - GIT_COMMIT_HASH = "17687df25126cf201693f5686224b4777de36f0b" + GIT_COMMIT_HASH = "6f13deac910c9796ce8fc0d39075880f74c1ff4e" #BEGIN_CLASS_HEADER # Class variables and functions can be defined in this block @@ -821,15 +821,10 @@ def run_metaSPAdes(self, ctx, params): def estimate_metaSPAdes_requirements(self, ctx, params): """ - :param params: instance of type "MetaSPAdesEstimatorParams" (params - - the params used to run metaSPAdes. use_defaults - (optional, def - 0) if 1, just return the default requirements use_heuristic - - (optional, def 1) if 1, only use a heuristic based on the reads - metadata to perform estimates) -> structure: parameter "params" of - type "SPAdesParams" (Input parameters for running SPAdes. - workspace_name - the name of the workspace from which to take - input and store output. output_contigset_name - the name of the - output contigset read_libraries - a list of Illumina + :param params: instance of type "SPAdesParams" (Input parameters for + running SPAdes. workspace_name - the name of the workspace from + which to take input and store output. output_contigset_name - the + name of the output contigset read_libraries - a list of Illumina PairedEndLibrary files in FASTQ or BAM format. dna_source - (optional) the source of the DNA used for sequencing 'single_cell': DNA amplified from a single cell via MDA anything @@ -849,7 +844,7 @@ def estimate_metaSPAdes_requirements(self, ctx, params): "dna_source" of String, parameter "min_contig_length" of Long, parameter "kmer_sizes" of list of Long, parameter "skip_error_correction" of type "bool" (A boolean. 0 = false, - anything else = true.), parameter "use_defaults" of Long + anything else = true.) :returns: instance of type "MetaSPAdesEstimate" (cpus - the number of CPUs required for the run memory - the minimal amount of memory in MB required for the run walltime - an estimate for walltime in @@ -860,7 +855,7 @@ def estimate_metaSPAdes_requirements(self, ctx, params): # return variables are: results #BEGIN estimate_metaSPAdes_requirements ws = Workspace(self.workspaceURL, token=ctx["token"]) - results = estimate_metaSPAdes_reqs(params["params"], ws, use_defaults=params.get("use_defaults", 0)==1) + results = estimate_metaSPAdes_reqs(params, ws) #END estimate_metaSPAdes_requirements # At some point might do deeper type checking... diff --git a/lib/kb_SPAdes/utils/estimator.py b/lib/kb_SPAdes/utils/estimator.py index 0750de7..6458347 100644 --- a/lib/kb_SPAdes/utils/estimator.py +++ b/lib/kb_SPAdes/utils/estimator.py @@ -1,4 +1,4 @@ -def estimate_metaSPAdes_reqs(params, ws, use_defaults=False): +def estimate_metaSPAdes_reqs(params, ws): """ Generates an estimate of how much computational power is needed to run metaSPAdes. params: dict with keys (only relevant ones given): @@ -18,19 +18,13 @@ def estimate_metaSPAdes_reqs(params, ws, use_defaults=False): raise ValueError("workspace_name is required to estimate metaSPAdes requirements!") if len(params.get("read_libraries", [])) == 0: raise ValueError("At least one read library is required to estimate metaSPAdes requirements!") - if use_defaults: - return { - "cpus": 16, - "memory": 4096, - "walltime": 300 - } ws_name = params.get("workspace_name") reads_refs = [] for lib_name in params["read_libraries"]: reads_refs.append({"ref": lib_name if "/" in lib_name else ws_name + "/" + lib_name}) reads_infos = ws.get_object_info3({ - "objects": reads_refs, + "objects": reads_refs, "includeMetadata": 1 }) @@ -55,7 +49,7 @@ def estimate_metaSPAdes_reqs(params, ws, use_defaults=False): # now we have an approximation of how many kmers there are. we can use # that to guesstimate how much memory we need - predicted_mem = (total_kmers * 2.962e-08 + 16.3) * 1.1 * 1024 + predicted_mem = (total_kmers * 2.962e-08 + 16.3) * 1.1 * 1024 est = { "cpus": 16, @@ -63,4 +57,4 @@ def estimate_metaSPAdes_reqs(params, ws, use_defaults=False): "walltime": max(total_kmers/100000, 300) } - return est \ No newline at end of file + return est diff --git a/lib/src/us/kbase/kbspades/HybridSPAdesParams.java b/lib/src/us/kbase/kbspades/HybridSPAdesParams.java index d95b836..542345b 100644 --- a/lib/src/us/kbase/kbspades/HybridSPAdesParams.java +++ b/lib/src/us/kbase/kbspades/HybridSPAdesParams.java @@ -34,7 +34,7 @@ * (all values must be odd, less than 128 and listed in ascending order) * In the absence of these values, K values are automatically selected. * min_contig_length - integer to filter out contigs with length < min_contig_length - * from the HybridSPAdes output. Default value is 0 implying no filter. + * from the HybridSPAdes output. Default value is 0 implying no filter. * @optional dna_source * @optional pipeline_options * @optional kmer_sizes diff --git a/lib/src/us/kbase/kbspades/KbSPAdesClient.java b/lib/src/us/kbase/kbspades/KbSPAdesClient.java index e65ca53..f6be770 100644 --- a/lib/src/us/kbase/kbspades/KbSPAdesClient.java +++ b/lib/src/us/kbase/kbspades/KbSPAdesClient.java @@ -227,12 +227,12 @@ public SPAdesOutput runMetaSPAdes(SPAdesParams params, RpcContext... jsonRpcCont *

Original spec-file function name: estimate_metaSPAdes_requirements

*
      * 
- * @param params instance of type {@link us.kbase.kbspades.MetaSPAdesEstimatorParams MetaSPAdesEstimatorParams} + * @param params instance of type {@link us.kbase.kbspades.SPAdesParams SPAdesParams} * @return parameter "results" of type {@link us.kbase.kbspades.MetaSPAdesEstimate MetaSPAdesEstimate} * @throws IOException if an IO exception occurs * @throws JsonClientException if a JSON RPC exception occurs */ - public MetaSPAdesEstimate estimateMetaSPAdesRequirements(MetaSPAdesEstimatorParams params, RpcContext... jsonRpcContext) throws IOException, JsonClientException { + public MetaSPAdesEstimate estimateMetaSPAdesRequirements(SPAdesParams params, RpcContext... jsonRpcContext) throws IOException, JsonClientException { List args = new ArrayList(); args.add(params); TypeReference> retType = new TypeReference>() {}; diff --git a/lib/src/us/kbase/kbspades/MetaSPAdesEstimatorParams.java b/lib/src/us/kbase/kbspades/MetaSPAdesEstimatorParams.java deleted file mode 100644 index 9e498f9..0000000 --- a/lib/src/us/kbase/kbspades/MetaSPAdesEstimatorParams.java +++ /dev/null @@ -1,145 +0,0 @@ - -package us.kbase.kbspades; - -import java.util.HashMap; -import java.util.Map; -import javax.annotation.Generated; -import com.fasterxml.jackson.annotation.JsonAnyGetter; -import com.fasterxml.jackson.annotation.JsonAnySetter; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.annotation.JsonPropertyOrder; - - -/** - *

Original spec-file type: MetaSPAdesEstimatorParams

- *
- * params - the params used to run metaSPAdes.
- * use_defaults - (optional, def 0) if 1, just return the default requirements
- * use_heuristic - (optional, def 1) if 1, only use a heuristic based on the reads metadata to perform estimates
- * 
- * - */ -@JsonInclude(JsonInclude.Include.NON_NULL) -@Generated("com.googlecode.jsonschema2pojo") -@JsonPropertyOrder({ - "params", - "use_defaults" -}) -public class MetaSPAdesEstimatorParams { - - /** - *

Original spec-file type: SPAdesParams

- *
-     * Input parameters for running SPAdes.
-     * workspace_name - the name of the workspace from which to take input
-     *                  and store output.
-     * output_contigset_name - the name of the output contigset
-     * read_libraries - a list of Illumina PairedEndLibrary files in FASTQ or BAM format.
-     * dna_source - (optional) the source of the DNA used for sequencing 'single_cell': DNA
-     *                  amplified from a single cell via MDA anything else: Standard
-     *                  DNA sample from multiple cells. Default value is None.
-     * min_contig_length - (optional) integer to filter out contigs with length < min_contig_length
-     *                  from the SPAdes output. Default value is 0 implying no filter.
-     * kmer_sizes - (optional) K-mer sizes, Default values: 33, 55, 77, 99, 127
-     *                  (all values must be odd, less than 128 and listed in ascending order)
-     *                  In the absence of these values, K values are automatically selected.
-     * skip_error_correction - (optional) Assembly only (No error correction).
-     *                  By default this is disabled.
-     * 
- * - */ - @JsonProperty("params") - private SPAdesParams params; - @JsonProperty("use_defaults") - private Long useDefaults; - private Map additionalProperties = new HashMap(); - - /** - *

Original spec-file type: SPAdesParams

- *
-     * Input parameters for running SPAdes.
-     * workspace_name - the name of the workspace from which to take input
-     *                  and store output.
-     * output_contigset_name - the name of the output contigset
-     * read_libraries - a list of Illumina PairedEndLibrary files in FASTQ or BAM format.
-     * dna_source - (optional) the source of the DNA used for sequencing 'single_cell': DNA
-     *                  amplified from a single cell via MDA anything else: Standard
-     *                  DNA sample from multiple cells. Default value is None.
-     * min_contig_length - (optional) integer to filter out contigs with length < min_contig_length
-     *                  from the SPAdes output. Default value is 0 implying no filter.
-     * kmer_sizes - (optional) K-mer sizes, Default values: 33, 55, 77, 99, 127
-     *                  (all values must be odd, less than 128 and listed in ascending order)
-     *                  In the absence of these values, K values are automatically selected.
-     * skip_error_correction - (optional) Assembly only (No error correction).
-     *                  By default this is disabled.
-     * 
- * - */ - @JsonProperty("params") - public SPAdesParams getParams() { - return params; - } - - /** - *

Original spec-file type: SPAdesParams

- *
-     * Input parameters for running SPAdes.
-     * workspace_name - the name of the workspace from which to take input
-     *                  and store output.
-     * output_contigset_name - the name of the output contigset
-     * read_libraries - a list of Illumina PairedEndLibrary files in FASTQ or BAM format.
-     * dna_source - (optional) the source of the DNA used for sequencing 'single_cell': DNA
-     *                  amplified from a single cell via MDA anything else: Standard
-     *                  DNA sample from multiple cells. Default value is None.
-     * min_contig_length - (optional) integer to filter out contigs with length < min_contig_length
-     *                  from the SPAdes output. Default value is 0 implying no filter.
-     * kmer_sizes - (optional) K-mer sizes, Default values: 33, 55, 77, 99, 127
-     *                  (all values must be odd, less than 128 and listed in ascending order)
-     *                  In the absence of these values, K values are automatically selected.
-     * skip_error_correction - (optional) Assembly only (No error correction).
-     *                  By default this is disabled.
-     * 
- * - */ - @JsonProperty("params") - public void setParams(SPAdesParams params) { - this.params = params; - } - - public MetaSPAdesEstimatorParams withParams(SPAdesParams params) { - this.params = params; - return this; - } - - @JsonProperty("use_defaults") - public Long getUseDefaults() { - return useDefaults; - } - - @JsonProperty("use_defaults") - public void setUseDefaults(Long useDefaults) { - this.useDefaults = useDefaults; - } - - public MetaSPAdesEstimatorParams withUseDefaults(Long useDefaults) { - this.useDefaults = useDefaults; - return this; - } - - @JsonAnyGetter - public Map getAdditionalProperties() { - return this.additionalProperties; - } - - @JsonAnySetter - public void setAdditionalProperties(String name, Object value) { - this.additionalProperties.put(name, value); - } - - @Override - public String toString() { - return ((((((("MetaSPAdesEstimatorParams"+" [params=")+ params)+", useDefaults=")+ useDefaults)+", additionalProperties=")+ additionalProperties)+"]"); - } - -} diff --git a/test/spades_estimator_test.py b/test/spades_estimator_test.py index 19eb3d5..413dcd5 100644 --- a/test/spades_estimator_test.py +++ b/test/spades_estimator_test.py @@ -45,12 +45,12 @@ class SimpleMockWs(): def __init__(self, *args, **kwargs): self.ws_mapping = dict() self.skip_even_meta = kwargs.get("skip_even_meta", False) - + def get_object_info3(self, params): ret_val = { "infos": [], "paths": [] - } + } for o in params["objects"]: self._setup_ids(o) ret_val["infos"].append(self._object_info(o, with_meta=params.get("includeMetadata", 0)==1)) @@ -81,7 +81,7 @@ def _object_to_path(self, obj): str(self.ws_mapping[ws_name]['objects'][obj_name]), "1" ]) - + def _object_info(self, obj, with_meta=False): ws_name, obj_name = obj["ref"].split("/")[:2] ws_id = self.ws_mapping[ws_name]["id"] @@ -132,11 +132,6 @@ def test_estimator(self): self.assertEqual(est["cpus"], 16) self.assertEqual(est["walltime"], 300) self.assertEqual(est["memory"], 18453) - - def test_estimator_defaults(self): - ws = SimpleMockWs() - estimates = estimate_metaSPAdes_reqs(self.simple_params, ws, use_defaults=True) - self.assertEqual(estimates, {"cpus": 16, "memory": 4096, "walltime": 300}) def test_estimator_bad_inputs(self): ws = SimpleMockWs() @@ -158,23 +153,10 @@ def test_estimator_missing_meta(self): self.assertEqual(est["cpus"], 16) self.assertEqual(est["walltime"], 300) self.assertEqual(est["memory"], 18453) - + @mock.patch('kb_SPAdes.kb_SPAdesImpl.Workspace', SimpleMockWs) def test_estimator_impl_simple(self): - est = self.serviceImpl.estimate_metaSPAdes_requirements(self.ctx, { - "params": self.simple_params, - "use_defaults": 0 - })[0] + est = self.serviceImpl.estimate_metaSPAdes_requirements(self.ctx, self.simple_params)[0] self.assertEqual(est["cpus"], 16) self.assertEqual(est["walltime"], 300) self.assertEqual(est["memory"], 18453) - - @mock.patch('kb_SPAdes.kb_SPAdesImpl.Workspace', SimpleMockWs) - def test_estimator_impl_defaults(self): - est = self.serviceImpl.estimate_metaSPAdes_requirements(self.ctx, { - "params": self.simple_params, - "use_defaults": 1 - })[0] - self.assertEqual(est["cpus"], 16) - self.assertEqual(est["walltime"], 300) - self.assertEqual(est["memory"], 4096)