datacommonsorg · SandeepTuniki · Apr 23, 2026 · Apr 23, 2026 · gemini-code-assist · Apr 23, 2026
diff --git a/tools/import_validation/runner.py b/tools/import_validation/runner.py
@@ -40,7 +40,8 @@ class ValidationRunner:
   """
 
     def __init__(self, validation_config_path: str, differ_output: str,
-                 stats_summary: str, lint_report: str, validation_output: str):
+                 stats_summary: str, lint_report: str, validation_output: str,
+                 counters_report: str = None):
         self.config = ValidationConfig(validation_config_path)
         self.validation_output = validation_output
         self.validator = Validator()
@@ -49,7 +50,8 @@ def __init__(self, validation_config_path: str, differ_output: str,
             'stats': pd.DataFrame(),
             'differ': pd.DataFrame(),
             'differ_summary': {},
-            'lint': {}
+            'lint': {},
+            'counters': {}
         }
 
         self.validation_dispatch = {
@@ -85,12 +87,22 @@ def __init__(self, validation_config_path: str, differ_output: str,
             'MAX_VALUE_CHECK':
                 (self.validator.validate_max_value_check, 'stats'),
             'GOLDENS_CHECK': (self.validator.validate_goldens, 'stats'),
+            'COUNTER_ZERO_CHECK':
+                (self.validator.validate_counter_zero, 'counters'),
+            'COUNTER_MAX_THRESHOLD':
+                (self.validator.validate_counter_max_threshold, 'counters'),
+            'COUNTER_RATIO_THRESHOLD':
+                (self.validator.validate_counter_ratio_threshold, 'counters'),
+            'COUNTER_SUM_INTEGRITY':
+                (self.validator.validate_counter_sum_integrity, 'counters'),
+            'COUNTER_MIN_YIELD':
+                (self.validator.validate_counter_min_yield, 'counters'),
         }
 
-        self._initialize_data_sources(stats_summary, lint_report, differ_output)
+        self._initialize_data_sources(stats_summary, lint_report, differ_output, counters_report)
 
     def _initialize_data_sources(self, stats_summary: str, lint_report: str,
-                                 differ_output: str):
+                                 differ_output: str, counters_report: str = None):
         """
     Checks for and loads the required data sources based on the config.
     """
@@ -167,6 +179,19 @@ def _initialize_data_sources(self, stats_summary: str, lint_report: str,
             logging.warning("lint_report file exists but is empty: %s",
                             lint_report)
 
+        if counters_report and os.path.exists(counters_report) and os.path.getsize(
+                counters_report) > 0:
+            try:
+                with open(counters_report, 'r') as f:
+                    self.data_sources['counters'] = json.load(f)
+            except Exception as e:
+                logging.error(
+                    f"JSON parse error while reading counters report at {counters_report}: {e}"
+                )
-            try:
-                with open(counters_report, 'r') as f:
-                    self.data_sources['counters'] = json.load(f)
-            except Exception as e:
-                logging.error(
-                    f"JSON parse error while reading counters report at {counters_report}: {e}"
-                )
+            try:
+                with open(counters_report, 'r') as f:
+                    self.data_sources['counters'] = json.load(f)
+            except (json.JSONDecodeError, OSError) as e:
+                raise ValueError(
+                    f"Failed to parse counters report at {counters_report}: {e}"
+                ) from e
-            except Exception as e:
-                logging.error(
-                    f"CSV parse error while reading counters report at {counters_report}: {e}"
-                )
+            except pd.errors.ParserError as e:
+                logging.error(
+                    f"CSV parse error while reading counters report at {counters_report}: {e}"
+                )
-            try:
-                with open(counters_report, 'r') as f:
-                    self.data_sources['counters'] = json.load(f)
-            except Exception as e:
-                logging.error(
-                    f"JSON parse error while reading counters report at {counters_report}: {e}"
-                )
+            try:
+                with open(counters_report, 'r') as f:
+                    self.data_sources['counters'] = json.load(f)
+            except (json.JSONDecodeError, OSError) as e:
+                raise ValueError(
+                    f"Failed to parse counters report at {counters_report}: {e}"
+                ) from e
-            except Exception as e:
-                logging.error(
-                    f"CSV parse error while reading counters report at {counters_report}: {e}"
-                )
+            except pd.errors.ParserError as e:
+                logging.error(
+                    f"CSV parse error while reading counters report at {counters_report}: {e}"
+                )
+        elif counters_report and os.path.exists(counters_report):
+            logging.warning("counters_report file exists but is empty: %s",
+                            counters_report)
+
     def _determine_required_sources(self) -> set[str]:
         """
     Parses the validation config to determine which data sources are required.
@@ -265,7 +290,8 @@ def main(_):
                                   differ_output=_FLAGS.differ_output,
                                   stats_summary=_FLAGS.stats_summary,
                                   lint_report=_FLAGS.lint_report,
-                                  validation_output=_FLAGS.validation_output)
+                                  validation_output=_FLAGS.validation_output,
+                                  counters_report=_FLAGS.counters_report)
         overall_status, _ = runner.run_validations()
         if not overall_status:
             sys.exit(1)
@@ -283,6 +309,8 @@ def main(_):
                         'Path to the stats summary report file.')
     flags.DEFINE_string('lint_report', None,
                         'Path to the mcf lint report file.')
+    flags.DEFINE_string('counters_report', None,
+                        'Path to the counters report file.')
     flags.DEFINE_string('validation_output', None,
                         'Path to the validation output file.')
     flags.mark_flag_as_required('validation_output')

diff --git a/tools/import_validation/runner_test.py b/tools/import_validation/runner_test.py
@@ -350,3 +350,57 @@ def test_init_raises_error_if_required_file_is_missing(self):
                 lint_report=self.report_path,
                 validation_output=self.output_path)
         self.assertIn("'stats' data source", str(context.exception))
+
+
+class TestCountersIntegration(unittest.TestCase):
+    '''Test Class for counter validations integration in ValidationRunner.'''
+
+    def setUp(self):
+        self.test_dir = tempfile.TemporaryDirectory()
+        self.config_path = os.path.join(self.test_dir.name, 'config.json')
+        self.stats_path = os.path.join(self.test_dir.name, 'stats.csv')
+        self.report_path = os.path.join(self.test_dir.name, 'report.json')
+        self.differ_path = os.path.join(self.test_dir.name, 'differ.csv')
+        self.output_path = os.path.join(self.test_dir.name, 'output.csv')
+        self.counters_path = os.path.join(self.test_dir.name, 'counters.json')
+
+    def tearDown(self):
+        self.test_dir.cleanup()
+
+    @patch('tools.import_validation.runner.Validator')
+    def test_runner_loads_counters_and_calls_validator(self, MockValidator):
+        # 1. Setup the mock
+        mock_validator_instance = MockValidator.return_value
+        mock_validator_instance.validate_counter_zero.return_value = ValidationResult(
+            ValidationStatus.PASSED, 'COUNTER_ZERO_CHECK')
+
+        # 2. Create test files
+        with open(self.config_path, 'w') as f:
+            json.dump(
+                {
+                    'rules': [{
+                        'rule_id': 'check_zero_errors',
+                        'validator': 'COUNTER_ZERO_CHECK',
+                        'params': {'counter_name': 'invalid-lat-lng'}
+                    }]
+                }, f)
+
+        counters_data = {'invalid-lat-lng': 0}
+        with open(self.counters_path, 'w') as f:
+            json.dump(counters_data, f)
+
+        # 3. Run the runner
+        runner = ValidationRunner(
+            validation_config_path=self.config_path,
+            stats_summary=self.stats_path,
+            differ_output=self.differ_path,
+            lint_report=self.report_path,
+            validation_output=self.output_path,
+            counters_report=self.counters_path)
+        runner.run_validations()
+
+        # 4. Assert that the correct method was called on the mock with loaded counters
+        mock_validator_instance.validate_counter_zero.assert_called_once()
+        call_args, _ = mock_validator_instance.validate_counter_zero.call_args
+        self.assertEqual(call_args[0]['invalid-lat-lng'], 0)
+
diff --git a/tools/import_validation/validator.py b/tools/import_validation/validator.py
@@ -1031,3 +1031,243 @@ def validate_goldens(self, df: pd.DataFrame,
                 ValidationStatus.DATA_ERROR,
                 'GOLDENS_CHECK',
                 message=f"Error during golden validation: {e}")
+
+    def validate_counter_zero(self, counters: dict,
+                              params: dict) -> ValidationResult:
+        """Checks that a specific counter is strictly zero.
+
+    Args:
+      counters: A dictionary containing the counter values.
+      params: A dictionary containing the validation parameters, which must
+        have a 'counter_name' key.
+
+    Returns:
+      A ValidationResult object.
+    """
+        if 'counter_name' not in params:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'COUNTER_ZERO_CHECK',
+                message="Configuration error: 'counter_name' key not specified."
+            )
+
+        counter_name = params['counter_name']
+        value = counters.get(counter_name, 0)
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
+
+        if value > 0:
-        if value > 0:
+        if value != 0:
-        if value > 0:
+        if value != 0:
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'COUNTER_ZERO_CHECK',
+                message=f"Counter '{counter_name}' is {value}, expected 0.",
+                details={
+                    'counter_name': counter_name,
+                    'actual_value': value,
+                    'expected_value': 0
+                })
+
+        return ValidationResult(ValidationStatus.PASSED,
+                                'COUNTER_ZERO_CHECK',
+                                details={
+                                    'counter_name': counter_name,
+                                    'actual_value': value,
+                                    'expected_value': 0
+                                })
+
+    def validate_counter_max_threshold(self, counters: dict,
+                                       params: dict) -> ValidationResult:
+        """Checks that a specific counter value does not exceed a maximum threshold.
+
+    Args:
+      counters: A dictionary containing the counter values.
+      params: A dictionary containing the validation parameters, which must
+        have 'counter_name' and 'threshold' keys.
+
+    Returns:
+      A ValidationResult object.
+    """
+        if 'counter_name' not in params or 'threshold' not in params:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'COUNTER_MAX_THRESHOLD',
+                message=
+                "Configuration error: 'counter_name' and 'threshold' must be specified."
+            )
+
+        counter_name = params['counter_name']
+        threshold = params['threshold']
+        value = counters.get(counter_name, 0)
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
+
+        if value > threshold:
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'COUNTER_MAX_THRESHOLD',
+                message=
+                f"Counter '{counter_name}' is {value}, which exceeds the threshold of {threshold}.",
+                details={
+                    'counter_name': counter_name,
+                    'actual_value': value,
+                    'threshold': threshold
+                })
+
+        return ValidationResult(ValidationStatus.PASSED,
+                                'COUNTER_MAX_THRESHOLD',
+                                details={
+                                    'counter_name': counter_name,
+                                    'actual_value': value,
+                                    'threshold': threshold
+                                })
+
+    def validate_counter_ratio_threshold(self, counters: dict,
+                                         params: dict) -> ValidationResult:
+        """Checks that the ratio of two counters (percentage) does not exceed a threshold.
+
+    Args:
+      counters: A dictionary containing the counter values.
+      params: A dictionary containing the validation parameters, which must
+        have 'subset_counter', 'total_counter', and 'threshold_percent' keys.
+
+    Returns:
+      A ValidationResult object.
+    """
+        if 'subset_counter' not in params or 'total_counter' not in params or 'threshold_percent' not in params:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'COUNTER_RATIO_THRESHOLD',
+                message=
+                "Configuration error: 'subset_counter', 'total_counter', and 'threshold_percent' must be specified."
+            )
+
+        subset_counter = params['subset_counter']
+        total_counter = params['total_counter']
+        threshold_percent = params['threshold_percent']
+
+        subset_value = counters.get(subset_counter, 0)
+        total_value = counters.get(total_counter, 0)
-        subset_value = counters.get(subset_counter, 0)
-        total_value = counters.get(total_counter, 0)
+        subset_value = float(counters[subset_counter])
+        total_value = float(counters[total_counter])
-        subset_value = counters.get(subset_counter, 0)
-        total_value = counters.get(total_counter, 0)
+        subset_value = float(counters[subset_counter])
+        total_value = float(counters[total_counter])
+
+        if total_value == 0:
+            if subset_value > 0:
+                percent = 100.0
+            else:
+                percent = 0.0
+        else:
+            percent = (subset_value / total_value) * 100
+
+        if percent > threshold_percent:
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'COUNTER_RATIO_THRESHOLD',
+                message=
+                f"Ratio of '{subset_counter}' to '{total_counter}' is {percent:.2f}%, which exceeds the threshold of {threshold_percent}%.",
+                details={
+                    'subset_counter': subset_counter,
+                    'subset_value': subset_value,
+                    'total_counter': total_counter,
+                    'total_value': total_value,
+                    'percent': percent,
+                    'threshold_percent': threshold_percent
+                })
+
+        return ValidationResult(ValidationStatus.PASSED,
+                                'COUNTER_RATIO_THRESHOLD',
+                                details={
+                                    'subset_counter': subset_counter,
+                                    'subset_value': subset_value,
+                                    'total_counter': total_counter,
+                                    'total_value': total_value,
+                                    'percent': percent,
+                                    'threshold_percent': threshold_percent
+                                })
+
+    def validate_counter_sum_integrity(self, counters: dict,
+                                       params: dict) -> ValidationResult:
+        """Checks that the sum of constituent counters equals a total counter.
+
+    Args:
+      counters: A dictionary containing the counter values.
+      params: A dictionary containing the validation parameters, which must
+        have 'total_counter' and 'constituent_counters' keys.
+
+    Returns:
+      A ValidationResult object.
+    """
+        if 'total_counter' not in params or 'constituent_counters' not in params:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'COUNTER_SUM_INTEGRITY',
+                message=
+                "Configuration error: 'total_counter' and 'constituent_counters' must be specified."
+            )
+
+        total_counter = params['total_counter']
+        constituent_counters = params['constituent_counters']
+
+        total_value = counters.get(total_counter, 0)
+        constituent_sum = sum(
+            counters.get(c, 0) for c in constituent_counters)
+
-        constituent_sum = sum(
-            counters.get(c, 0) for c in constituent_counters)
+        total_value = float(counters[total_counter])
+        constituent_sum = sum(
+            float(counters[c]) for c in constituent_counters)
-        constituent_sum = sum(
-            counters.get(c, 0) for c in constituent_counters)
+        total_value = float(counters[total_counter])
+        constituent_sum = sum(
+            float(counters[c]) for c in constituent_counters)
+        if total_value != constituent_sum:
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'COUNTER_SUM_INTEGRITY',
+                message=
+                f"Sum of constituent counters ({constituent_sum}) does not equal total counter '{total_counter}' ({total_value}).",
+                details={
+                    'total_counter': total_counter,
+                    'total_value': total_value,
+                    'constituent_counters': constituent_counters,
+                    'constituent_sum': constituent_sum
+                })
+
+        return ValidationResult(ValidationStatus.PASSED,
+                                'COUNTER_SUM_INTEGRITY',
+                                details={
+                                    'total_counter': total_counter,
+                                    'total_value': total_value,
+                                    'constituent_counters': constituent_counters,
+                                    'constituent_sum': constituent_sum
+                                })
+
+    def validate_counter_min_yield(self, counters: dict,
+                                   params: dict) -> ValidationResult:
+        """Checks that a specific counter is above a minimum yield.
+
+    Args:
+      counters: A dictionary containing the counter values.
+      params: A dictionary containing the validation parameters, which must
+        have 'counter_name' and 'min_yield' keys.
+
+    Returns:
+      A ValidationResult object.
+    """
+        if 'counter_name' not in params or 'min_yield' not in params:
+            return ValidationResult(
+                ValidationStatus.CONFIG_ERROR,
+                'COUNTER_MIN_YIELD',
+                message=
+                "Configuration error: 'counter_name' and 'min_yield' must be specified."
+            )
+
+        counter_name = params['counter_name']
+        min_yield = params['min_yield']
+        value = counters.get(counter_name, 0)
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
-        value = counters.get(counter_name, 0)
+        value = float(counters[counter_name])
+
+        if value < min_yield:
+            return ValidationResult(
+                ValidationStatus.FAILED,
+                'COUNTER_MIN_YIELD',
+                message=
+                f"Counter '{counter_name}' is {value}, which is below the minimum yield of {min_yield}.",
+                details={
+                    'counter_name': counter_name,
+                    'actual_value': value,
+                    'min_yield': min_yield
+                })
+
+        return ValidationResult(ValidationStatus.PASSED,
+                                'COUNTER_MIN_YIELD',
+                                details={
+                                    'counter_name': counter_name,
+                                    'actual_value': value,
+                                    'min_yield': min_yield
+                                })
+