diff --git a/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/golden_data/golden_summary_report.csv b/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/golden_data/golden_summary_report.csv new file mode 100644 index 0000000000..0b3627f554 --- /dev/null +++ b/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/golden_data/golden_summary_report.csv @@ -0,0 +1,4 @@ +"MeasurementMethods","MinDate","NumPlaces","ScalingFactors","StatVar","Units","observationPeriods" +"[EurostatRegionalStatistics]","2013","2122","[]","MeanMothersAge_BirthEvent","[Year]","[]" +"[EurostatRegionalStatistics]","2013","2122","[]","MedianMothersAge_BirthEvent","[Year]","[]" +"[EurostatRegionalStatistics]","2013","2122","[]","FertilityRate_Person_Female","[]","[]" diff --git a/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/validation_config.json b/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/validation_config.json index 0fc58021af..a4d71bf128 100644 --- a/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/validation_config.json +++ b/scripts/eurostat/regional_statistics_by_nuts/fertility_rate_mother_age/validation_config.json @@ -6,7 +6,14 @@ "description": "Checks that the percentage of deleted points is within the threshold.", "validator": "DELETED_RECORDS_PERCENT", "params": { - "threshold": 0.01 + "threshold": 0.1 + } + }, + { + "rule_id": "check_goldens_summary_report", + "validator": "GOLDENS_CHECK", + "params": { + "golden_files": "golden_data/golden_summary_report.csv" } } ] diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py index f1364518e6..777336a165 100644 --- a/tools/import_validation/runner.py +++ b/tools/import_validation/runner.py @@ -14,6 +14,7 @@ """Module for the ValidationRunner class.""" import os +import logging from absl import app from absl import flags from absl import logging @@ -34,6 +35,55 @@ _FLAGS = flags.FLAGS +def _is_relative_local(path_val: str) -> bool: + """Checks if a path is a relative, local file path. + + This function identifies path strings that represent local relative files + (e.g., 'golden_data/un_wpp.csv') as opposed to absolute paths. It filters + out non-strings, empty strings, and absolute local paths. + + Args: + path_val: The file path string to evaluate. + + Returns: + True if the path represents a relative, local file path; False otherwise. + """ + if not isinstance(path_val, str) or not path_val: + return False + return not os.path.isabs(path_val) + + +def _find_base_dir(start_path: str, target_sub_path: str) -> str | None: + """Helper to find a base directory containing a target sub-path by walking up. + + Starting from the absolute directory of `start_path`, this function recursively + checks if `target_sub_path` exists in the current folder. If not, it walks up the + parent directory tree up to 10 levels. This is crucial for resolving paths relative + to import-specific golden directories when tests/validation are run from + different working directories (such as the repository root in CI/CD). + + Args: + start_path: The file or directory path to start the upward search from. + target_sub_path: The name of the subdirectory or file (e.g., 'golden_data') + to search for within the parent tree. + + Returns: + The absolute path of the directory containing `target_sub_path` if found, + or None if the root was reached or the 10-level limit was exceeded. + """ + if not start_path: + return None + curr = os.path.abspath(start_path) + for _ in range(8): # limit to 10 levels up + if os.path.exists(os.path.join(curr, target_sub_path)): + return curr + parent = os.path.dirname(curr) + if parent == curr: + break + curr = parent + return None + + class ValidationRunner: """ Orchestrates the validation process based on the new schema. @@ -41,6 +91,8 @@ class ValidationRunner: def __init__(self, validation_config_path: str, differ_output: str, stats_summary: str, lint_report: str, validation_output: str): + self.validation_config_path = validation_config_path + self.stats_summary = stats_summary self.config = ValidationConfig(validation_config_path) self.validation_output = validation_output self.validator = Validator() @@ -212,6 +264,50 @@ def run_validations(self) -> tuple[bool, list[ValidationResult]]: if output_dir: rule_params.setdefault('output_path', output_dir) + # Resolve paths relative to the directory of the validation config. + if validator_name == 'GOLDENS_CHECK': + config_dir = None + # Walk up from validation_config_path, self.stats_summary, or CWD to find where 'golden_data' lives + for start in [ + self.validation_config_path, self.stats_summary, + os.getcwd() + ]: + config_dir = _find_base_dir(start, 'golden_data') + if config_dir: + break + + if not config_dir: + config_dir = os.path.dirname( + os.path.abspath(self.validation_config_path)) + + print( + f"DEBUG: Found GOLDENS_CHECK rule: '{rule.get('rule_id')}'" + ) + print( + f"DEBUG: Config directory resolved to: '{config_dir}'") + for path_key in list(rule_params.keys()): + # Check any key in rule_params that equals 'golden_files' or 'input_files' or ends with '_file' or '_files' + if path_key in ( + 'golden_files', + 'input_files') or path_key.endswith( + '_file') or path_key.endswith('_files'): + val = rule_params[path_key] + print( + f"DEBUG: Before resolve '{path_key}': '{val}'") + if isinstance(val, str): + if _is_relative_local(val): + rule_params[path_key] = os.path.join( + config_dir, val) + elif isinstance(val, list): + rule_params[path_key] = [ + os.path.join(config_dir, item) + if _is_relative_local(item) else item + for item in val + ] + print( + f"DEBUG: After resolve '{path_key}': '{rule_params[path_key]}'" + ) + if validator_name == 'SQL_VALIDATOR': result = validation_func(self.data_sources['stats'], self.data_sources['differ'], diff --git a/tools/import_validation/validator_goldens.py b/tools/import_validation/validator_goldens.py index 7b19b783fe..4f8d8b8b32 100644 --- a/tools/import_validation/validator_goldens.py +++ b/tools/import_validation/validator_goldens.py @@ -68,6 +68,7 @@ --generate_goldens=goldens_data/generated_goldens.csv """ +import csv import os import sys import tempfile @@ -311,7 +312,6 @@ def load_nodes_from_file(files: str) -> dict: logging.info(f'Loaded {len(nodes)} nodes from {input_files}') return nodes - def generate_goldens(input_files: str, property_sets: list, output_file: str = None, @@ -440,9 +440,23 @@ def generate_goldens(input_files: str, if golden_nodes and output_file: logging.info(f'Writing {len(golden_nodes)} goldens to {output_file}') if file_util.file_is_csv(output_file): - file_util.file_write_csv_dict(golden_nodes, - output_file, - key_column_name=None) + headers = [] + for node in golden_nodes.values(): + for prop in node.keys(): + if prop not in headers: + headers.append(prop) + with file_util.FileIO(output_file, mode='w') as csvfile: + writer = csv.DictWriter( + csvfile, + fieldnames=headers, + escapechar='\\', + extrasaction='ignore', + quotechar='"', + quoting=csv.QUOTE_NONNUMERIC, + ) + writer.writeheader() + for node in golden_nodes.values(): + writer.writerow(node) else: mcf_file_util.write_mcf_nodes([golden_nodes], output_file)