Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,39 @@ Raises `ObjectNotFound` (HTTP 404) if no validation has been run for the resourc

## Config settings

None at present
### `ckanext.validate.fail_on_invalid_upload`

**TODO:** Document any optional config settings here. For example:
Controls whether CSV files are validated **synchronously** during resource
create and update. When enabled, invalid files are rejected before being
saved, and the user receives a `ValidationError` with the details.

# The minimum number of hours to wait before re-checking a resource
# (optional, default: 24).
ckanext.validate.some_setting = some_default_value
| | |
|---|---|
| **Type** | boolean |
| **Default** | `false` |

```ini
# Reject invalid CSV files on upload (optional, default: false).
ckanext.validate.fail_on_invalid_upload = false
```

**When `true`:**

* Uploaded CSV files are validated with Frictionless during
`resource_create` and `resource_update`.
* If the file is invalid, a `ValidationError` is raised and the resource is
**not** saved.
* The user receives a clear message listing the validation errors.
* Only applies to file uploads — URL-linked resources and metadata-only
updates are unaffected.

**When `false` (default):**

* The existing behaviour is preserved: uploads are always accepted and
validated asynchronously in the background via the job queue.

> **Note:** the default is `false` to preserve backwards compatibility.
> Enable this option only when your instance requires a strict upload policy.


## Developer installation
Expand Down
2 changes: 2 additions & 0 deletions ckanext/validate/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ def before_resource_show(self, resource_dict):
return resource_dict

def before_resource_create(self, context, resource):
resource_hooks.validate_csv_upload_strict(resource)
return resource

def before_resource_update(self, context, current, resource):
resource_hooks.validate_csv_upload_strict(resource)
return resource

def before_resource_delete(self, context, resource, resources):
Expand Down
83 changes: 83 additions & 0 deletions ckanext/validate/resource_hooks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@

import logging
import os
import tempfile

import ckan.plugins.toolkit as toolkit
from frictionless import Resource as FrictionlessResource, system

from ckanext.validate import jobs

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -110,3 +115,81 @@ def handle_resource_change(context, resource_dict, operation):
)

return True


# ---------------------------------------------------------------------------
# Strict-mode: synchronous validation before create/update
# ---------------------------------------------------------------------------

def is_fail_on_invalid_upload_enabled():
"""Return True when the sysadmin has enabled strict CSV-upload validation."""
return toolkit.asbool(
toolkit.config.get("ckanext.validate.fail_on_invalid_upload", False)
)


def validate_csv_upload_strict(data_dict):
"""Synchronously validate a CSV upload and raise ValidationError if invalid.

Called from ``before_resource_create`` and ``before_resource_update`` when
``ckanext.validate.fail_on_invalid_upload`` is ``true``. Operates on the
in-memory file object *before* it is written to disk, so an invalid file
never persists.

Does nothing when:
* strict mode is disabled (the default),
* the resource format is not CSV, or
* no file upload is present in *data_dict* (e.g. a metadata-only update).
"""
if not is_fail_on_invalid_upload_enabled():
return

fmt = (data_dict.get("format") or "").strip().lower()
if fmt != "csv":
return

upload = data_dict.get("upload")
if not upload or isinstance(upload, str) or not hasattr(upload, "read"):
return

try:
content = upload.read()
upload.seek(0)
except Exception:
log.warning(
"validate_csv_upload_strict: could not read upload object — skipping strict validation"
)
return

if not content:
return

try:
with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp:
tmp.write(content)
tmp_path = tmp.name
except Exception as exc:
log.warning("validate_csv_upload_strict: could not write temp file — skipping: %s", exc)
return

try:
with system.use_context(trusted=True):
res = FrictionlessResource("file://" + tmp_path, format="csv")
report = res.validate()
except Exception as exc:
raise toolkit.ValidationError(
{"upload": [toolkit._("System error during validation: {0}").format(str(exc))]}
)
finally:
os.unlink(tmp_path)

if not report.valid:
errors = []
for task in report.tasks:
errors.extend(task.errors)

error_messages = [err.message for err in errors]
if not error_messages:
error_messages = [toolkit._("Structural validation error")]

raise toolkit.ValidationError({"upload": error_messages})
46 changes: 46 additions & 0 deletions ckanext/validate/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_some_action():
"""
import pytest
from ckan.plugins import plugin_loaded
from ckan.plugins import toolkit as tk

from ckanext.validate import resource_hooks
from ckanext.validate.blueprints import resource as validate_resource
Expand Down Expand Up @@ -138,3 +139,48 @@ def test_plugin_delete_hooks_are_noops():

assert plugin.before_resource_delete({}, {"id": "res-6"}, [{"id": "res-6"}]) is None
assert plugin.after_resource_delete({}, [{"id": "res-6"}]) is None


def test_plugin_before_resource_create_delegates_to_strict_validation(monkeypatch):
captured = {}

def fake_validate_strict(data_dict):
captured["data_dict"] = data_dict

monkeypatch.setattr(resource_hooks, "validate_csv_upload_strict", fake_validate_strict)

plugin = ValidatePlugin()
resource = {"id": "res-7", "format": "CSV"}
result = plugin.before_resource_create({}, resource)

assert result == resource
assert captured["data_dict"] is resource


def test_plugin_before_resource_update_delegates_to_strict_validation(monkeypatch):
captured = {}

def fake_validate_strict(data_dict):
captured["data_dict"] = data_dict

monkeypatch.setattr(resource_hooks, "validate_csv_upload_strict", fake_validate_strict)

plugin = ValidatePlugin()
current = {"id": "res-8"}
resource = {"id": "res-8", "format": "CSV"}
result = plugin.before_resource_update({}, current, resource)

assert result == resource
assert captured["data_dict"] is resource


def test_plugin_before_resource_create_propagates_validation_error(monkeypatch):

def raise_error(data_dict):
raise tk.ValidationError({"upload": ["Type error in row 2"]})

monkeypatch.setattr(resource_hooks, "validate_csv_upload_strict", raise_error)

plugin = ValidatePlugin()
with pytest.raises(tk.ValidationError):
plugin.before_resource_create({}, {"format": "CSV", "upload": object()})
139 changes: 139 additions & 0 deletions ckanext/validate/tests/test_resource_hooks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
import io
from pathlib import Path

import pytest
from ckan.plugins import toolkit

from ckanext.validate import jobs
from ckanext.validate import resource_hooks

_INTERNAL_PATCH_FLAG = "_validate_internal_patch"

FIXTURES_DIR = Path(__file__).parent / "files_test"


class FakeUpload:
"""Minimal file-like upload object for testing strict validation."""

def __init__(self, content):
if isinstance(content, str):
content = content.encode("utf-8")
self._buffer = io.BytesIO(content)

def read(self):
return self._buffer.read()

def seek(self, pos):
self._buffer.seek(pos)


def test_mark_resource_as_pending_uses_internal_flag_and_user(monkeypatch):
captured = {}
Expand Down Expand Up @@ -158,3 +181,119 @@ def fake_enqueue_resource_validation_job(resource_id, username=None):

assert result is False
assert called == {"pending": False, "enqueue": False}


# ---------------------------------------------------------------------------
# is_fail_on_invalid_upload_enabled
# ---------------------------------------------------------------------------


@pytest.mark.ckan_config("ckanext.validate.fail_on_invalid_upload", "false")
def test_is_fail_on_invalid_upload_enabled_returns_false_by_default():
assert resource_hooks.is_fail_on_invalid_upload_enabled() is False


@pytest.mark.ckan_config("ckanext.validate.fail_on_invalid_upload", "true")
def test_is_fail_on_invalid_upload_enabled_returns_true_when_configured():
assert resource_hooks.is_fail_on_invalid_upload_enabled() is True


@pytest.mark.ckan_config("ckanext.validate.fail_on_invalid_upload", "false")
def test_is_fail_on_invalid_upload_enabled_returns_false_when_configured():
assert resource_hooks.is_fail_on_invalid_upload_enabled() is False


# ---------------------------------------------------------------------------
# validate_csv_upload_strict — skip conditions (strict mode disabled)
# ---------------------------------------------------------------------------


def test_validate_csv_upload_strict_skips_when_disabled(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: False)
data_dict = {"format": "CSV", "upload": FakeUpload("id,name\n1,Alice\n")}
# Must not raise
resource_hooks.validate_csv_upload_strict(data_dict)


def test_validate_csv_upload_strict_skips_non_csv(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
data_dict = {"format": "XLSX", "upload": FakeUpload("some data")}
resource_hooks.validate_csv_upload_strict(data_dict)


def test_validate_csv_upload_strict_skips_when_no_upload_key(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
data_dict = {"format": "CSV"}
resource_hooks.validate_csv_upload_strict(data_dict)


def test_validate_csv_upload_strict_skips_url_string_upload(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
data_dict = {"format": "CSV", "upload": "https://example.com/data.csv"}
resource_hooks.validate_csv_upload_strict(data_dict)


def test_validate_csv_upload_strict_skips_empty_content(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
data_dict = {"format": "CSV", "upload": FakeUpload(b"")}
resource_hooks.validate_csv_upload_strict(data_dict)


# ---------------------------------------------------------------------------
# validate_csv_upload_strict — strict mode enabled, valid CSV
# ---------------------------------------------------------------------------


def test_validate_csv_upload_strict_valid_csv_does_not_raise(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
content = (FIXTURES_DIR / "valid.csv").read_bytes()
data_dict = {"format": "CSV", "upload": FakeUpload(content)}
resource_hooks.validate_csv_upload_strict(data_dict)


def test_validate_csv_upload_strict_resets_upload_position_after_valid(monkeypatch):
"""Upload must be seeked back to 0 so CKAN can still read it for the actual save."""
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
content = (FIXTURES_DIR / "valid.csv").read_bytes()
upload = FakeUpload(content)
data_dict = {"format": "CSV", "upload": upload}
resource_hooks.validate_csv_upload_strict(data_dict)
assert upload.read() == content


# ---------------------------------------------------------------------------
# validate_csv_upload_strict — strict mode enabled, invalid CSV
# ---------------------------------------------------------------------------


def test_validate_csv_upload_strict_invalid_csv_raises_validation_error(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
content = (FIXTURES_DIR / "bike-errors.csv").read_bytes()
data_dict = {"format": "CSV", "upload": FakeUpload(content)}
with pytest.raises(toolkit.ValidationError) as exc:
resource_hooks.validate_csv_upload_strict(data_dict)
assert "upload" in exc.value.error_dict
assert exc.value.error_dict["upload"]


def test_validate_csv_upload_strict_error_messages_are_strings(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)
content = (FIXTURES_DIR / "bike-errors.csv").read_bytes()
data_dict = {"format": "CSV", "upload": FakeUpload(content)}
with pytest.raises(toolkit.ValidationError) as exc:
resource_hooks.validate_csv_upload_strict(data_dict)
for msg in exc.value.error_dict["upload"]:
assert isinstance(msg, str)


def test_validate_csv_upload_strict_frictionless_exception_raises_validation_error(monkeypatch):
monkeypatch.setattr(resource_hooks, "is_fail_on_invalid_upload_enabled", lambda: True)

def explode(source, format):
raise RuntimeError("frictionless crashed")

monkeypatch.setattr(resource_hooks, "FrictionlessResource", explode)
data_dict = {"format": "CSV", "upload": FakeUpload("id,name\n1,Alice\n")}
with pytest.raises(toolkit.ValidationError) as exc:
resource_hooks.validate_csv_upload_strict(data_dict)
assert "upload" in exc.value.error_dict
Loading