Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions collectors/osv/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from apps.taskman.constants import JIRA_AUTH_TOKEN, JIRA_EMAIL
from collectors.constants import SNIPPET_CREATION_ENABLED
from collectors.framework.models import Collector
from collectors.osv.constants import OSV_START_DATE
from collectors.osv.constants import OSV_START_DATE, REFERENCES_THAT_CAN_BE_IGNORED
from collectors.utils import convert_cvss_score_to_impact, handle_urls
from osidb.core import set_user_acls
from osidb.models import Flaw, FlawCVSS, FlawReference, Snippet
Expand All @@ -30,6 +30,13 @@ class OSVCollectorException(Exception):
"""exception for OSV Collector"""


class OSVCollectorIgnoredException(Exception):
"""exception for OSV Collector ignored vulnerability"""

def __init__(self, osv_id):
super().__init__(f"OSV vulnerability {osv_id} was ignored.")


class OSVCollector(Collector):
# Snippet creation is disabled for now
snippet_creation_enabled = None
Expand Down Expand Up @@ -130,10 +137,13 @@ def collect(self, osv_id: Union[str, None] = None) -> str:
if osv_id is not None:
# Surface an exception when collecting an individual OSV vulnerability
osv_vuln = self.fetch_osv_vuln_by_id(osv_id)
osv_id, cve_ids, content = self.extract_content(osv_vuln)
try:
osv_id, cve_ids, content = self.extract_content(osv_vuln)
with transaction.atomic():
self.save_snippet_and_flaw(osv_id, cve_ids, content)
except OSVCollectorIgnoredException as exc:
logger.warning(str(exc))
return f"OSV collection for {osv_id} was ignored."
except Exception as exc:
message = f"Failed to save snippet and flaw for {osv_id}. Error: {exc}."
logger.error(message)
Expand All @@ -148,6 +158,9 @@ def collect(self, osv_id: Union[str, None] = None) -> str:
for osv_vuln in self.fetch_osv_vulns_for_ecosystem(ecosystem):
try:
osv_id, cve_ids, content = self.extract_content(osv_vuln)
except OSVCollectorIgnoredException as exc:
logger.warning(str(exc))
continue
except Exception as exc:
logger.error(
f"Failed to parse data from {osv_vuln['id']} vulnerability: {exc}"
Expand Down Expand Up @@ -286,8 +299,11 @@ def get_refs(data: dict) -> list:
return refs

def get_comment_zero(data: dict) -> str:
# https://ossf.github.io/osv-schema/#summary-details-fields
return data.get("details", "")
if not (detail := data.get("details", "")) and self.can_be_ignored(
osv_vuln
):
raise OSVCollectorIgnoredException(osv_vuln["id"])
return detail

def get_title(data: dict) -> str:
# https://ossf.github.io/osv-schema/#summary-details-fields
Expand Down Expand Up @@ -342,3 +358,17 @@ def get_cvss_and_impact(data: dict) -> tuple[list, str]:
}

return osv_id, cve_ids, content

def has_reference_to_ignore(self, osv_vuln: dict) -> bool:
for ref_to_ignore in REFERENCES_THAT_CAN_BE_IGNORED:
for osv_reference in osv_vuln.get("references", [{}]):
if ref_to_ignore in osv_reference.get("url", ""):
return True
return False

def is_withdrawn(self, osv_vuln: dict) -> bool:
return osv_vuln.get("withdrawn", False)

def can_be_ignored(self, osv_vuln: dict) -> bool:
"""Advisories come from some echohq.com site and have been withdrawn can be ignored."""
return self.is_withdrawn(osv_vuln) and self.has_reference_to_ignore(osv_vuln)
3 changes: 3 additions & 0 deletions collectors/osv/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
OSV_COLLECTOR_ENABLED = get_env("OSV_COLLECTOR_ENABLED", default="True", is_bool=True)

OSV_START_DATE = get_env_date("OSV_START_DATE", default="2024-07-01")


REFERENCES_THAT_CAN_BE_IGNORED = ["echohq.com"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.32.3
method: GET
uri: https://example.com/v1/vulns/GO-2023-1494
response:
body:
string: '{"schema_version": "1.7.5", "id": "ECHO-7627-a361-b4d3", "published": "2025-11-25T20:28:26.189245Z", "modified": "2026-04-21T08:01:08.171596Z", "withdrawn": "2026-04-21T08:01:08.171596Z", "aliases": ["BIT-django-2025-57833", "CVE-2025-57833", "GHSA-6w2r-r2m5-xq5w", "ECHO-7627-a361-b4d3"], "upstream": ["CVE-2025-57833"], "affected": [{"package": {"name": "django", "ecosystem": "PyPI", "purl": "pkg:pypi/django"}, "ranges": [{"type": "ECOSYSTEM", "events": [{"introduced": "0"}, {"fixed": "5.2.1+echo.1"}]}], "versions": ["1.0.1", "1.0.2", "1.0.3", "1.0.4", "1.1", "1.1.1", "1.1.2", "1.1.3", "1.1.4", "1.10", "1.10.1", "1.10.2", "1.10.3", "1.10.4", "1.10.5", "1.10.6", "1.10.7", "1.10.8", "1.10a1", "1.10b1", "1.10rc1", "1.11", "1.11.1", "1.11.10", "1.11.11", "1.11.12", "1.11.13", "1.11.14", "1.11.15", "1.11.16", "1.11.17", "1.11.18", "1.11.2", "1.11.20", "1.11.21", "1.11.22", "1.11.23", "1.11.24", "1.11.25", "1.11.26", "1.11.27", "1.11.28", "1.11.29", "1.11.3", "1.11.4", "1.11.5", "1.11.6", "1.11.7", "1.11.8", "1.11.9", "1.11a1", "1.11b1", "1.11rc1", "1.2", "1.2.1", "1.2.2", "1.2.3", "1.2.4", "1.2.5", "1.2.6", "1.2.7", "1.3", "1.3.1", "1.3.2", "1.3.3", "1.3.4", "1.3.5", "1.3.6", "1.3.7", "1.4", "1.4.1", "1.4.10", "1.4.11", "1.4.12", "1.4.13", "1.4.14", "1.4.15", "1.4.16", "1.4.17", "1.4.18", "1.4.19", "1.4.2", "1.4.20", "1.4.21", "1.4.22", "1.4.3", "1.4.4", "1.4.5", "1.4.6", "1.4.7", "1.4.8", "1.4.9", "1.5", "1.5.1", "1.5.10", "1.5.11", "1.5.12", "1.5.2", "1.5.3", "1.5.4", "1.5.5", "1.5.6", "1.5.7", "1.5.8", "1.5.9", "1.6", "1.6.1", "1.6.10", "1.6.11", "1.6.2", "1.6.3", "1.6.4", "1.6.5", "1.6.6", "1.6.7", "1.6.8", "1.6.9", "1.7", "1.7.1", "1.7.10", "1.7.11", "1.7.2", "1.7.3", "1.7.4", "1.7.5", "1.7.6", "1.7.7", "1.7.8", "1.7.9", "1.8", "1.8.1", "1.8.10", "1.8.11", "1.8.12", "1.8.13", "1.8.14", "1.8.15", "1.8.16", "1.8.17", "1.8.18", "1.8.19", "1.8.2", "1.8.3", "1.8.4", "1.8.5", "1.8.6", "1.8.7", "1.8.8", "1.8.9", "1.8a1", "1.8b1", "1.8b2", "1.8c1", "1.9", "1.9.1", "1.9.10", "1.9.11", "1.9.12", "1.9.13", "1.9.2", "1.9.3", "1.9.4", "1.9.5", "1.9.6", "1.9.7", "1.9.8", "1.9.9", "1.9a1", "1.9b1", "1.9rc1", "1.9rc2", "2.0", "2.0.1", "2.0.10", "2.0.12", "2.0.13", "2.0.2", "2.0.3", "2.0.4", "2.0.5", "2.0.6", "2.0.7", "2.0.8", "2.0.9", "2.0a1", "2.0b1", "2.0rc1", "2.1", "2.1.1", "2.1.10", "2.1.11", "2.1.12", "2.1.13", "2.1.14", "2.1.15", "2.1.2", "2.1.3", "2.1.4", "2.1.5", "2.1.7", "2.1.8", "2.1.9", "2.1a1", "2.1b1", "2.1rc1", "2.2", "2.2.1", "2.2.10", "2.2.11", "2.2.12", "2.2.13", "2.2.14", "2.2.15", "2.2.16", "2.2.17", "2.2.18", "2.2.19", "2.2.2", "2.2.20", "2.2.21", "2.2.22", "2.2.23", "2.2.24", "2.2.25", "2.2.26", "2.2.27", "2.2.28", "2.2.3", "2.2.4", "2.2.5", "2.2.6", "2.2.7", "2.2.8", "2.2.9", "2.2a1", "2.2b1", "2.2rc1", "3.0", "3.0.1", "3.0.10", "3.0.11", "3.0.12", "3.0.13", "3.0.14", "3.0.2", "3.0.3", "3.0.4", "3.0.5", "3.0.6", "3.0.7", "3.0.8", "3.0.9", "3.0a1", "3.0b1", "3.0rc1", "3.1", "3.1.1", "3.1.10", "3.1.11", "3.1.12", "3.1.13", "3.1.14", "3.1.2", "3.1.3", "3.1.4", "3.1.5", "3.1.6", "3.1.7", "3.1.8", "3.1.9", "3.1a1", "3.1b1", "3.1rc1", "3.2", "3.2.1", "3.2.10", "3.2.11", "3.2.12", "3.2.13", "3.2.14", "3.2.15", "3.2.16", "3.2.17", "3.2.18", "3.2.19", "3.2.2", "3.2.20", "3.2.21", "3.2.22", "3.2.23", "3.2.24", "3.2.25", "3.2.3", "3.2.4", "3.2.5", "3.2.6", "3.2.7", "3.2.8", "3.2.9", "3.2a1", "3.2b1", "3.2rc1", "4.0", "4.0.1", "4.0.10", "4.0.2", "4.0.3", "4.0.4", "4.0.5", "4.0.6", "4.0.7", "4.0.8", "4.0.9", "4.0a1", "4.0b1", "4.0rc1", "4.1", "4.1.1", "4.1.10", "4.1.11", "4.1.12", "4.1.13", "4.1.2", "4.1.3", "4.1.4", "4.1.5", "4.1.6", "4.1.7", "4.1.8", "4.1.9", "4.1a1", "4.1b1", "4.1rc1", "4.2", "4.2.1", "4.2.10", "4.2.11", "4.2.12", "4.2.13", "4.2.14", "4.2.15", "4.2.16", "4.2.17", "4.2.18", "4.2.19", "4.2.2", "4.2.20", "4.2.21", "4.2.22", "4.2.23", "4.2.24", "4.2.25", "4.2.26", "4.2.27", "4.2.28", "4.2.29", "4.2.3", "4.2.30", "4.2.4", "4.2.5", "4.2.6", "4.2.7", "4.2.8", "4.2.9", "4.2a1", "4.2b1", "4.2rc1", "5.0", "5.0.1", "5.0.10", "5.0.11", "5.0.12", "5.0.13", "5.0.14", "5.0.2", "5.0.3", "5.0.4", "5.0.5", "5.0.6", "5.0.7", "5.0.8", "5.0.9", "5.0a1", "5.0b1", "5.0rc1", "5.1", "5.1.1", "5.1.10", "5.1.11", "5.1.12", "5.1.13", "5.1.14", "5.1.15", "5.1.2", "5.1.3", "5.1.4", "5.1.5", "5.1.6", "5.1.7", "5.1.8", "5.1.9", "5.1a1", "5.1b1", "5.1rc1", "5.2", "5.2.1", "5.2a1", "5.2b1", "5.2rc1"], "database_specific": {"source": "https://advisory.echohq.com/osv/ECHO-7627-a361-b4d3.json"}}], "references": [{"type": "WEB", "url": "https://advisory.echohq.com/cve/CVE-2025-57833"}, {"type": "WEB", "url": "https://github.com/advisories/GHSA-6w2r-r2m5-xq5w"}]}'
headers:
Content-Length:
- '1007'
Date:
- Tue, 22 Oct 2024 18:28:22 GMT
Server:
- Google Frontend
X-Cloud-Trace-Context:
- a6455da084d2906f226ac84d4b3f3518
alt-svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
content-type:
- application/json
grpc-accept-encoding:
- identity, deflate, gzip
grpc-message:
- ''
grpc-status:
- '0'
x-envoy-decorator-operation:
- ingress GetVulnById
status:
code: 200
message: OK
version: 1
15 changes: 15 additions & 0 deletions collectors/osv/tests/test_collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,21 @@ def test_ignore_osv_record_historical(self):
assert Snippet.objects.count() == 0
assert Flaw.objects.count() == 0

@pytest.mark.vcr_override_domain(False)
@pytest.mark.vcr
@pytest.mark.default_cassette("osv_record_without_details.yaml")
def test_ignore_osv_record_without_details(self):
"""
Test that OSV record without details and withdrawn is ignored.
"""
osvc = OSVCollector()
osvc.snippet_creation_enabled = True
osvc.snippet_creation_start_date = datetime(2024, 1, 1, tzinfo=timezone.utc)
osvc.collect(osv_id="GO-2023-1494")

assert Snippet.objects.count() == 0
assert Flaw.objects.count() == 0

@pytest.mark.enable_signals
@pytest.mark.vcr
def test_collect_osv_record_with_cvss(self):
Expand Down
67 changes: 49 additions & 18 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,55 @@ def filter_response(response):
return response


def _vcr_should_override_response_domain(request):
"""
Default True: scrub http(s) hosts in response bodies to example.com when recording.

Request URIs are always scrubbed so cassettes stay stable and playback still matches
(VCR applies before_record_request when matching too).

Use @pytest.mark.vcr_override_domain(False) to keep original domains in recorded
response bodies only (e.g. echohq.com URLs that tests assert on).
"""
marker = request.node.get_closest_marker("vcr_override_domain")
if marker is None:
return True
if "override_domain" in marker.kwargs:
return bool(marker.kwargs["override_domain"])
if marker.args:
return bool(marker.args[0])
return True


def remove_host_request(request):
"""Normalize request host for stable cassettes and consistent playback matching."""
request.uri = re.sub(base_url_pattern, "https://example.com", request.uri)
return request


def remove_host_response(response):
body_string = re.sub(
base_url_pattern,
"https://example.com",
response["body"]["string"].decode("utf-8"),
)
response["body"]["string"] = body_string.encode("utf-8")
def make_remove_host_response(override_domain: bool):
def remove_host_response(response):
if override_domain:
body_string = re.sub(
base_url_pattern,
"https://example.com",
response["body"]["string"].decode("utf-8"),
)
response["body"]["string"] = body_string.encode("utf-8")

# redirected requests need Location header
original_locations = response["headers"].get("Location", [])
if original_locations:
locations = []
for location in original_locations:
locations.append(re.sub(base_url_pattern, "https://example.com", location))
response["headers"]["Location"] = locations
# redirected requests need Location header
original_locations = response["headers"].get("Location", [])
if original_locations:
locations = []
for location in original_locations:
locations.append(
re.sub(base_url_pattern, "https://example.com", location)
)
response["headers"]["Location"] = locations

return response
return response

return remove_host_response


@pytest.fixture(scope="function")
Expand All @@ -130,15 +157,19 @@ def refresh_func():
return refresh_func


@pytest.fixture(scope="session")
def vcr_config():
@pytest.fixture(scope="function")
def vcr_config(request):
override_response_domain = _vcr_should_override_response_domain(request)
return {
"filter_headers": [
"Authorization",
"Cookie",
],
"before_record_request": [remove_host_request],
"before_record_response": [remove_host_response, filter_response],
"before_record_response": [
make_remove_host_response(override_response_domain),
filter_response,
],
"filter_query_parameters": [
"Bugzilla_api_key",
],
Expand Down
3 changes: 3 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Affect(s) can be automatically created and assigned to Flaw(s) for
specific products (OSIDB-4878)

### Changed
- osv_collector now ignore withdrawn items.

## [5.9.0] - 2026-04-09
### Fixed
- Fix invalid `in` field in kerberos OpenAPI security scheme (OSIDB-1590)
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ markers =
enable_signals: enables django signals to run.
enable_rls: enables row-level-security in the database during testing.
queryset: marks a database query count test.
vcr_override_domain: pass False to keep original domains in recorded response bodies only; request URIs are always normalized (default True).

filterwarnings =
error
Expand Down
Loading