diff --git a/lib/galaxy/tool_util/lint.py b/lib/galaxy/tool_util/lint.py index 3d2796a2927e..d5535022f61e 100644 --- a/lib/galaxy/tool_util/lint.py +++ b/lib/galaxy/tool_util/lint.py @@ -373,7 +373,7 @@ def lint_tool_source_with_modules(lint_context: LintContext, tool_source, linter for module in linter_modules: module_name = module.__name__.split(".")[-1] - lint_tool_types = getattr(module, "lint_tool_types", ["default", "manage_data"]) + lint_tool_types = getattr(module, "lint_tool_types", ["default", "manage_data", "expression"]) if not ("*" in lint_tool_types or tool_type in lint_tool_types): continue diff --git a/lib/galaxy/tool_util/linters/_util.py b/lib/galaxy/tool_util/linters/_util.py index 84ee89d1d981..7cc354577e3e 100644 --- a/lib/galaxy/tool_util/linters/_util.py +++ b/lib/galaxy/tool_util/linters/_util.py @@ -1,5 +1,60 @@ import re +from Cheetah.Compiler import Compiler +from Cheetah.Template import Template + +from galaxy.util import unicodify + + +def get_code(tool_xml): + """get code used in the Galaxy tool""" + # get code from command and configfiles + code = "" + for tag in [".//command", ".//configfile"]: + for cn in tool_xml.findall(tag): + code += cn.text + # TODO not sure about this line, I get UnicodeError for some tools otherwise (trinity) + code = "#encoding utf-8\n" + code + code = Template.compile(source=code, compilerClass=Compiler, returnAClass=False) + code = unicodify(code) + + expression_nodes = tool_xml.findall(".//expression") + expression = "" + if len(expression_nodes) > 0: + expression = expression_nodes[0].text + + # template macros + # note: not necessary (& possible) for macro tokens which are expanded + # during loading the xml (and removed from the macros tag) + templatecode = "" + for cn in tool_xml.findall('.//macros/macro[@type="template"]'): + templatecode += cn.text + templatecode = "#encoding utf-8\n" + templatecode + templatecode = Template.compile(source=templatecode, compilerClass=Compiler, returnAClass=False) + templatecode = unicodify(templatecode) + + # get code from output filters (which use variables wo $) + filtercode = "" + for cn in tool_xml.findall("./outputs/*/filter"): + filtercode += cn.text + "\n" + + # get output labels which might contain code (which use variables like ${var}) + labelcode = "" + for cn in tool_xml.findall("./outputs/*[@label]"): + labelcode += cn.attrib["label"] + "\n" + + actioncode = "" + for cn in tool_xml.findall("./outputs//conditional[@name]"): + actioncode += cn.attrib["name"] + "\n" + for cn in tool_xml.findall('./outputs//action/option[@type="from_param"]'): + actioncode += cn.attrib.get("name", "") + "\n" + for cn in tool_xml.findall("./outputs//action/option/filter[@ref]"): + actioncode += cn.attrib["ref"] + "\n" + for cn in tool_xml.findall("./outputs//action[@default]"): + actioncode += cn.attrib["default"] + "\n" + + return code, expression, templatecode, filtercode, labelcode, actioncode + def is_datasource(tool_xml): """Returns true if the tool is a datasource tool""" diff --git a/lib/galaxy/tool_util/linters/command.py b/lib/galaxy/tool_util/linters/command.py index eff900341cb1..ec5d9c4611b2 100644 --- a/lib/galaxy/tool_util/linters/command.py +++ b/lib/galaxy/tool_util/linters/command.py @@ -16,6 +16,8 @@ class CommandMissing(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + if tool_source.parse_tool_type() == "expression": + return tool_xml = getattr(tool_source, "xml_tree", None) if not tool_xml: return diff --git a/lib/galaxy/tool_util/linters/inputs_used.py b/lib/galaxy/tool_util/linters/inputs_used.py new file mode 100644 index 000000000000..911e2c0f46ba --- /dev/null +++ b/lib/galaxy/tool_util/linters/inputs_used.py @@ -0,0 +1,225 @@ +import re +from typing import TYPE_CHECKING + +import esprima +from Cheetah.Parser import ParseError + +from galaxy.tool_util.lint import Linter +from ._util import ( + get_code, + is_valid_cheetah_placeholder, +) +from ..parser.util import _parse_name + +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser import ToolSource + + +def _param_in_compiled_cheetah(param_name: str, code: str) -> bool: + # # accession $PATH.param_name.ATTRIBUTES in cheetah gives + # # VFFSL(SL,"PATH.param_name.ATTRIBUTES",True) + # # for PATH and ATTRIBUTES we assume simply ([\w.]+)? + # # since if wrong it will be discovered in a test + # if re.search(r'VFFSL\(SL,[\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'],True\)', code): + # return True + + # # print("checking path") + # # accessing $PATH.param_name['ATTRIBUTE'] (ATTRIBUTE eg reverse) + # # or $PATH.param_name['ATTRIBUTE'].FURTHER_ATTRIBUTE gives + # # VFN(VFN(VFFSL(SL,"PATH",True),"param_name",True)['ATTRIBUTE'],"FURTHER_ATTRIBUTE",True) + # # we simply search VFN(VFFSL(SL,"PATH",True),"param_name",True) + # # ie ignore the ATTRIBUTES part and for PATH we assume again ([\w.]+)? + # if re.search(r'VFN\(VFFSL\(SL,[\"\'][\w.]+[\"\'],True\),[\"\']' + param_name + r'[\"\'],True\)', code): + # return True + + # # all these are covered by the rawExpr regular expression below + # # - $getVar("param_name") + # # gets + # # _v = VFFSL(SL,"getVar",False)("param_name") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("param_name")')) + # # - $getVar("getvar_default", "default") + # # gets + # # _v = VFFSL(SL,"getVar",False)("getvar_default", "default") + # # if _v is not None: write(_filter(_v, rawExpr='$getVar("getvar_default", "default")')) + # if re.search(r'VFFSL\(SL,[\"\']getVar[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\"\'](, [^()]+)?\)', code): + # return True + + # # - $varExists("varexists") + # # gets + # # _v = VFFSL(SL,"varExists",False)("varexists") + # # if _v is not None: write(_filter(_v, rawExpr='$varExists("varexists")')) + # # - $hasVar("hasvar") + # # gets + # # _v = VFFSL(SL,"hasVar",False)("hasvar") + # # if _v is not None: write(_filter(_v, rawExpr='$hasVar("hasvar")')) + # if re.search(r'VFFSL\(SL,[\"\'](varExists|hasvar)[\"\'],False\)\([\"\']([\w.]+\.)?' + param_name + r'(\.[\w.]+)?[\'\"]\)', code): + # return True + + # # Also the following is possible (TODO but we could forbid it) + # # $PATH["param_name"].ATTRIBUTES + # # VFFSL(SL,"PATH",True)["param_name"].ATTRIBUTES + # if re.search(r'VFFSL\(SL,[\"\'][\w.]+[\"\'],True\)\[[\"\']' + param_name + r'[\"\']\]', code): + # return True + # gets + # set $rg_id = str($rg_param('read_group_id_conditional').ID) + # rg_id = str(VFN(VFFSL(SL,"rg_param",False)('read_group_id_conditional'),"ID",True)) + if re.search(r"(VFN|VFFSL)\(.*[\"\']([\w.]+\.)?" + param_name + r"(\.[\w.]+)?[\"\']", code): + return True + + # #for i, r in enumerate($repeat_name) + # #set x = $str(r[ 'param_name' ]) + # #end for + # the loop content gets + # x = VFFSL(SL,"str",False)(r[ 'var_in_repeat' ]) + if re.search(r"(VFFSL|VFN)\(.*\[\s*[\"\']" + param_name + r"[\"\']\s*\]", code): + # print(f" G") + return True + + # "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + # gets + # _v = ",".join( [ str( r.var_in_repeat3 ) for r in VFFSL(SL,"repeat_name",True) ] ) + # if _v is not None: write(_filter(_v, rawExpr='${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }')) + if re.search(r"rawExpr=([\"\'])(.*[^\w])?" + param_name + r"([^\w].*)?(\1)", code): + return True + + # print("FALSE") + return False + + +def _param_in_expression(param_name: str, expression: str) -> bool: + try: + syntax_tree = esprima.parseScript(expression, tolerant=True) + except Exception as e: + return False + + def check_node(node): + if hasattr(node, "__dict__"): + if getattr(node, "type", None) == "Identifier" and getattr(node, "name", None) == param_name: + return True + for key, value in vars(node).items(): + if isinstance(value, (list, esprima.nodes.Node)) and check_node(value): + return True + elif isinstance(node, list): + for item in node: + if check_node(item): + return True + return False + + return check_node(syntax_tree.body) + + + +class InputsUsed(Linter): + """ + check if the parameter is used somewhere, the following cases are considered: + - in the command or a configfile + - in an output filter + - if it is the select of conditional + - if there is an inputs configfile that is used + - for data parameters data_style needs to be set for the config file + - for other parameters the name of the config file must be used in the code + + a warning is shown if the parameter is used only in + - template macro, + - output action, or + - label of an output + + otherwise + - if the parameter only appears in change_format + - or if none of the previous cases apply + """ + + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return + + try: + code, template_code, filter_code, label_code, action_code = get_code(tool_xml) + except ParseError as pe: + lint_ctx.error(f"Invalid cheetah found{pe}", linter=cls.name(), node=tool_xml.getroot()) + return + + inputs = tool_xml.findall("./inputs//param") + for param in inputs: + try: + param_name = _parse_name(param.attrib.get("name"), param.attrib.get("argument")) + except ValueError: + continue + if not is_valid_cheetah_placeholder(param_name): + continue + if _param_in_compiled_cheetah(param, param_name, code): + continue + if _param_in_expression(param_name, expression): + continue + if param_name in filter_code: + continue + # TODO We need to check if the referenced parameter is the current one + if ( + tool_xml.find("./inputs//param/options[@from_dataset]") is not None + or tool_xml.find("./inputs//param/options/filter[@ref]") is not None + ): + continue + if param.getparent().tag == "conditional": + continue + + conf_inputs = tool_xml.find("./configfiles/inputs") + if conf_inputs is not None: # in this it's really hard to know + param_type = param.attrib.get("type") + if param_type in ["data", "collection"]: + if not conf_inputs.attrib.get("data_style"): + lint_ctx.error( + f"Param input [{param_name}] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?", + linter=cls.name(), + node=param, + ) + inputs_conf_name = conf_inputs.attrib.get("name", conf_inputs.attrib.get("filename", None)) + if inputs_conf_name: + if not re.search(r"(^|[^\w])" + inputs_conf_name + r"([^\w]|$)", code): + lint_ctx.error( + f"Param input [{param_name}] only used inputs configfile {inputs_conf_name}, but this is not used in the command", + linter=cls.name(), + node=param, + ) + continue + + change_format = tool_xml.find(f"./outputs//change_format/when[@input='{param_name}']") is not None + template = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", template_code) is not None + action = re.search(r"(^|[^\w])" + param_name + r"([^\w]|$)", action_code) is not None + label = re.search(r"[^\w]" + param_name + r"([^\w]|$)", label_code) is not None + if template or action or label: + if template + action + label == 1: + only = "only " + else: + only = "" + # TODO check that template is used?? + if template: + lint_ctx.warn( + f"Param input [{param_name}] {only}used in a template macro, use a macro token instead.", + linter=cls.name(), + node=param, + ) + if action: + lint_ctx.warn( + f"Param input [{param_name}] {only}found in output actions, better use extended metadata.", + linter=cls.name(), + node=param, + ) + if label: + lint_ctx.warn( + f"Param input [{param_name}] {only}found in a label attribute, this is discouraged.", + linter=cls.name(), + node=param, + ) + continue + + if change_format: + lint_ctx.error( + f"Param input [{param_name}] is only used in a change_format tag", linter=cls.name(), node=param + ) + else: + lint_ctx.error( + f"Param input [{param_name}] not found in command or configfiles.", linter=cls.name(), node=param + ) diff --git a/lib/galaxy/tool_util/parser/util.py b/lib/galaxy/tool_util/parser/util.py index 0dbc476ef0c3..d1ea5b399b98 100644 --- a/lib/galaxy/tool_util/parser/util.py +++ b/lib/galaxy/tool_util/parser/util.py @@ -32,6 +32,12 @@ def is_dict(item): return isinstance(item, dict) or isinstance(item, OrderedDict) +def _prepare_argument(argument): + if argument is None: + return "" + return argument.lstrip("-").replace("-", "_") + + def _parse_name(name, argument): """Determine name of an input source from name and argument returns the name or if absent the argument property @@ -41,7 +47,7 @@ def _parse_name(name, argument): if name is None: if argument is None: raise ValueError("parameter must specify a 'name' or 'argument'.") - name = argument.lstrip("-").replace("-", "_") + name = _prepare_argument(argument) return name diff --git a/packages/tool_util/setup.cfg b/packages/tool_util/setup.cfg index 9f087d25d9f0..b3f0ced15665 100644 --- a/packages/tool_util/setup.cfg +++ b/packages/tool_util/setup.cfg @@ -34,7 +34,8 @@ version = 25.1.dev0 include_package_data = True install_requires = galaxy-tool-util-models - galaxy-util[image-util]>=22.1 + galaxy-util[image_util]>=22.1 + Cheetah3 conda-package-streaming lxml!=4.2.2 MarkupSafe diff --git a/test/unit/tool_util/test_tool_linters.py b/test/unit/tool_util/test_tool_linters.py index 7c25524bb391..2922cfbc43ea 100644 --- a/test/unit/tool_util/test_tool_linters.py +++ b/test/unit/tool_util/test_tool_linters.py @@ -192,6 +192,7 @@ INPUTS_VALID = """ + $txt_param $int_param @@ -201,6 +202,10 @@ INPUTS_PARAM_NAME = """ + + $param_name + $valid + @@ -213,6 +218,7 @@ INPUTS_PARAM_TYPE = """ + $valid_name $another_valid_name @@ -222,6 +228,7 @@ INPUTS_DATA_PARAM = """ + $valid_name @@ -230,6 +237,7 @@ INPUTS_DATA_PARAM_OPTIONS = """ + @@ -243,6 +251,7 @@ INPUTS_DATA_PARAM_OPTIONS_FILTER_ATTRIBUTE = """ + @@ -256,6 +265,7 @@ INPUTS_DATA_PARAM_INVALIDOPTIONS = """ + $valid_name @@ -267,6 +277,7 @@ """ + INPUTS_BOOLEAN_PARAM_SWAPPED_LABELS = """ @@ -277,6 +288,7 @@ INPUTS_BOOLEAN_PARAM_DUPLICATE_LABELS = """ + $valid_name @@ -285,6 +297,7 @@ INPUTS_CONDITIONAL = """ + @@ -340,6 +353,11 @@ INPUTS_SELECT_INCOMPATIBLE_DISPLAY = """ + + $radio_select + $checkboxes_select + $checkboxes_select_correct + @@ -360,6 +378,7 @@ INPUTS_SELECT_DUPLICATED_OPTIONS = """ + $select @@ -371,6 +390,7 @@ SELECT_DUPLICATED_OPTIONS_WITH_DIFF_SELECTED = """ + $select @@ -382,6 +402,7 @@ INPUTS_SELECT_DEPRECATIONS = """ + $select_do $select_ff $select_fp @@ -396,6 +417,7 @@ INPUTS_SELECT_OPTION_DEFINITIONS = """ + @@ -422,6 +444,7 @@ INPUTS_SELECT_FILTER = """ + $select_filter_types @@ -435,6 +458,7 @@ INPUTS_VALIDATOR_INCOMPATIBILITIES = """ + $param_name $another_param_name TEXT @@ -455,6 +479,7 @@ INPUTS_VALIDATOR_CORRECT = """ + $data_param $collection_param $text_param $select_param $int_param @@ -500,6 +525,7 @@ INPUTS_TYPE_CHILD_COMBINATIONS = """ + $text_param $select_param $data_param $int_param @@ -525,6 +551,7 @@ INPUTS_DUPLICATE_NAMES = """ + $dup $sec.dup_in_section $cond.dup_in_cond $dup_in_output @@ -551,8 +578,10 @@ """ + INPUTS_FILTER_INCORRECT = """ + @@ -571,6 +600,7 @@ INPUTS_FILTER_CORRECT = """ + @@ -594,6 +624,188 @@ """ +# tests for many different cheetah placeholder syntax possibilities +# that should pass and two that should give an error +INPUTS_USED_PARAMETER_IN_COMMAND = """ + + + $simple1 + ${ simple2 } + ## Variable in repeat + #for i, r in enumerate($repeat_name) + #set x = $str(r[ 'var_in_repeat' ]) + ${r.var_in_repeat2} + #end for + "${ ",".join( [ str( r.var_in_repeat3 ) for r in $repeat_name ] ) }" + $sec.paired_collection1.forward.ext + $sec["paired_collection3"].forward.ext + $sec.sec2.paired_collection2['reverse'].ext + $getVar("getvar") + $getVar("getvar_default", "default") + $hasVar("hasvar") + $varExists("varexists") + ## check that comments are considered + ## $hey_a_missing_parameter + also comments at the end of lines are ignored ## $hey_a_missing_parameter + #* + $hey_a_missing_parameter + *# + #* $hey_a_missing_parameter *# + $same_param_in_both_whens + + + $param_in_config_file + ]]> + + + + + + + + + +
+ + +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + data_param_filter + + + + + + + + + + + + collection_param_filter + + + + + + + +
+""" + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS = """ + + + do something with $inputs ## but note, the linter does not check if inputs is really used + + + + + + + + + +""" + +INPUTS_USED_OTHER = """ + + + + + + #include source=$template_token# + + + $param_in_config_file + ]]> + + + + + + + + + + + + + data_param_filter + + + + + + + + + + + + +""" + +INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2 = """ + + + do nothing with the configfile + + + + + + + + +""" + +# test case checking if syntax errors are reported +INPUTS_USED_PARAMETER_SYNTAX_ERROR = """ + + + #if $param + this is unfinished if + + + + +""" + # test tool xml for outputs linter OUTPUTS_MISSING = """ @@ -713,6 +925,7 @@ # tool xml for repeats linter REPEATS = """ + $another_param_name @@ -1739,6 +1952,89 @@ def test_inputs_repeats(lint_ctx): assert len(lint_ctx.error_messages) == 2 +def test_inputs_used_parameter_in_command(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND) + run_lint_module(lint_ctx, inputs, tool_source) + assert "Found 20 input parameters." in lint_ctx.info_messages + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert "Param input [hey_a_missing_parameter] not found in command or configfiles." in lint_ctx.error_messages + assert "Param input [change_format_parameter] is only used in a change_format tag" in lint_ctx.error_messages + assert len(lint_ctx.error_messages) == 2 + + +def test_inputs_used_other(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_OTHER) + run_lint_module(lint_ctx, inputs, tool_source) + assert "Found 5 input parameters." in lint_ctx.info_messages + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert ( + "Param input [variable_in_template_token] only used in a template macro, use a macro token instead." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_option_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_filter_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [action_conditional_reference] only found in output actions, better use extended metadata." + in lint_ctx.warn_messages + ) + assert ( + "Param input [param_that_is_used_only_in_output_label] only found in a label attribute, this is discouraged." + in lint_ctx.warn_messages + ) + assert len(lint_ctx.warn_messages) == 5 + assert len(lint_ctx.error_messages) == 0 + + +def test_inputs_used_parameter_in_command_with_inputs(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS) + run_lint_module(lint_ctx, inputs, tool_source) + assert "Found 2 input parameters." in lint_ctx.info_messages + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + "Param input [param_that_is_not_in_inputs_configfile] not found in command or configfiles. Does the present inputs config miss the 'data_style' attribute?" + in lint_ctx.error_messages + ) + assert len(lint_ctx.error_messages) == 1 + + +def test_inputs_used_parameter_in_command_with_inputs2(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_IN_COMMAND_WITH_INPUTS2) + run_lint_module(lint_ctx, inputs, tool_source) + assert "Found 1 input parameters." in lint_ctx.info_messages + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + "Param input [param_that_is_not_in_inputs_configfile] only used inputs configfile inputs, but this is not used in the command" + in lint_ctx.error_messages + ) + assert len(lint_ctx.error_messages) == 1 + + +def test_inputs_used_parameter_syntax_error(lint_ctx): + tool_source = get_xml_tool_source(INPUTS_USED_PARAMETER_SYNTAX_ERROR) + run_lint_module(lint_ctx, inputs, tool_source) + assert "Found 1 input parameters." in lint_ctx.info_messages + assert len(lint_ctx.info_messages) == 1 + assert not lint_ctx.valid_messages + assert not lint_ctx.warn_messages + assert ( + "Invalid cheetah found\n\nSome #directives are missing their corresponding #end ___ tag: if\nLine 4, column 33\n\nLine|Cheetah Code\n----|-------------------------------------------------------------\n2 |\n3 | #if $param\n4 | this is unfinished if\n ^\n" + in lint_ctx.error_messages + ) + + def test_outputs_missing(lint_ctx): tool_source = get_xml_tool_source(OUTPUTS_MISSING) run_lint_module(lint_ctx, output, tool_source) @@ -2210,6 +2506,7 @@ def test_valid_datatypes(lint_ctx): DATA_MANAGER = """ + $select @@ -2233,10 +2530,9 @@ def test_data_manager(lint_ctx_xpath, lint_ctx): assert "No citations found, consider adding citations to your tool." in lint_ctx.warn_messages assert "Select parameter [select] has multiple options with the same text content" in lint_ctx.error_messages assert "Select parameter [select] has multiple options with the same value" in lint_ctx.error_messages - assert "No command tag found, must specify a command template to execute." in lint_ctx.error_messages assert lint_ctx.valid_messages assert len(lint_ctx.warn_messages) == 4 - assert len(lint_ctx.error_messages) == 3 + assert len(lint_ctx.error_messages) == 2 COMPLETE = """ @@ -2424,7 +2720,7 @@ def test_skip_by_module(lint_ctx): def test_list_linters(): linter_names = Linter.list_listers() # make sure to add/remove a test for new/removed linters if this number changes - assert len(linter_names) == 142 + assert len(linter_names) == 143 assert "Linter" not in linter_names # make sure that linters from all modules are available for prefix in [