Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 24 additions & 4 deletions cumulusci/tasks/bulkdata/select_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,31 @@

OPTIONAL_DEPENDENCIES_AVAILABLE = True
except ImportError:
OPTIONAL_DEPENDENCIES_AVAILABLE = False

_MISSING_OPTIONAL_DEPS_WARNING_EMITTED = False


def _warn_missing_optional_deps_once() -> None:
"""Emit the optional-[select]-deps warning at most once per process.

The warning previously fired at module import time, so every
``extract_dataset`` invocation surfaced it via transitive imports even
when no similarity strategy was in use (#3886). Defer the emission to
the code path that actually pays the perf penalty.
"""
global _MISSING_OPTIONAL_DEPS_WARNING_EMITTED
if _MISSING_OPTIONAL_DEPS_WARNING_EMITTED:
return
_MISSING_OPTIONAL_DEPS_WARNING_EMITTED = True
logger.warning(
f"Optional dependencies are missing. "
"Handling high volumes of records for the 'select' functionality will be significantly slower, "
"Optional dependencies are missing. "
"Handling high volumes of records for the 'select' functionality "
"will be significantly slower, "
"as optimizations for this feature are currently disabled. "
f"To enable optimized performance, install all required dependencies using: {get_cci_upgrade_command()}[select]\n"
"To enable optimized performance, install all required dependencies "
f"using: {get_cci_upgrade_command()}[select]"
)
OPTIONAL_DEPENDENCIES_AVAILABLE = False


class SelectStrategy(StrEnum):
Expand Down Expand Up @@ -313,6 +331,8 @@ def similarity_post_process(
insert_records = []

if complexity_constant < 1000 or not OPTIONAL_DEPENDENCIES_AVAILABLE:
if complexity_constant >= 1000 and not OPTIONAL_DEPENDENCIES_AVAILABLE:
_warn_missing_optional_deps_once()
select_records, insert_records = levenshtein_post_process(
load_records, query_records, fields, weights, threshold
)
Expand Down
73 changes: 73 additions & 0 deletions cumulusci/tasks/bulkdata/tests/test_select_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import sys

import pytest

from cumulusci.tasks.bulkdata.select_utils import (
Expand Down Expand Up @@ -1042,3 +1045,73 @@ def test_split_and_filter_fields_multiple_unique_lookups():
assert (
select_fields == fields
) # No filtering applied since all components are unique


def test_select_utils_import_does_not_emit_warning_when_optional_deps_missing(
caplog,
):
"""Regression test for GH #3886.

Importing ``cumulusci.tasks.bulkdata.select_utils`` must not emit a
WARNING-level log record at module-load time, even when the optional
``[select]`` deps (numpy/pandas/annoy/scikit-learn) are unavailable.
Two transitive imports pull ``select_utils`` into ``extract.py``, so a
module-import warning fires on every ``extract_dataset`` invocation
regardless of whether the user opted into a similarity strategy.
"""
blocked_top = {"numpy", "pandas", "annoy", "sklearn"}

saved_modules = {}
for name in list(sys.modules):
top = name.split(".", 1)[0]
if top in blocked_top:
saved_modules[name] = sys.modules[name]

saved_select_utils = sys.modules.pop("cumulusci.tasks.bulkdata.select_utils", None)

from pydantic.v1 import class_validators as _v1_class_validators

saved_func_refs = {
ref
for ref in _v1_class_validators._FUNCS
if ref.startswith("cumulusci.tasks.bulkdata.select_utils.")
}

try:
for name in list(sys.modules):
top = name.split(".", 1)[0]
if top in blocked_top:
sys.modules[name] = None
for name in blocked_top:
sys.modules[name] = None

_v1_class_validators._FUNCS -= saved_func_refs

caplog.clear()
with caplog.at_level(
logging.DEBUG, logger="cumulusci.tasks.bulkdata.select_utils"
):
import cumulusci.tasks.bulkdata.select_utils # noqa: F401

warning_records = [
r
for r in caplog.records
if r.levelno >= logging.WARNING
and r.name == "cumulusci.tasks.bulkdata.select_utils"
]
assert warning_records == [], (
"Module import emitted WARNING-level log record(s) when optional "
"[select] deps are missing (#3886): "
f"{[r.getMessage() for r in warning_records]}"
)
finally:
sys.modules.pop("cumulusci.tasks.bulkdata.select_utils", None)
for name in blocked_top:
sys.modules.pop(name, None)
for name, mod in saved_modules.items():
sys.modules[name] = mod
_v1_class_validators._FUNCS |= saved_func_refs
if saved_select_utils is not None:
sys.modules["cumulusci.tasks.bulkdata.select_utils"] = saved_select_utils
else:
import cumulusci.tasks.bulkdata.select_utils # noqa: F401
Loading