From 92728649e888730b472ac9b1cddccc916d6a1b80 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 20 Mar 2026 13:10:43 +0200
Subject: [PATCH 001/100] OSF to use latest djelme

---
 .docker-compose.env                      |  1 +
 api/base/settings/defaults.py            | 16 ++++--
 docker-compose.yml                       | 14 +++++
 osf/metrics/counted_usage.py             |  2 +-
 osf/metrics/preprint_metrics.py          |  2 +-
 osf/metrics/registry_metrics.py          |  2 +-
 osf/metrics/reports.py                   |  2 +-
 osf_tests/metrics/test_daily_report.py   |  2 +-
 osf_tests/metrics/test_metric_mixin.py   |  2 +-
 osf_tests/metrics/test_monthly_report.py |  2 +-
 poetry.lock                              | 67 ++++++++++++++++++------
 pyproject.toml                           |  6 ++-
 website/settings/defaults.py             |  1 +
 13 files changed, 90 insertions(+), 29 deletions(-)

diff --git a/.docker-compose.env b/.docker-compose.env
index 9cb7a59e274..449c9747adf 100644
--- a/.docker-compose.env
+++ b/.docker-compose.env
@@ -7,6 +7,7 @@ INTERNAL_DOMAIN=http://192.168.168.167:5000/
 API_DOMAIN=http://localhost:8000/
 ELASTIC_URI=192.168.168.167:9200
 ELASTIC6_URI=192.168.168.167:9201
+ELASTIC8_URI=http://192.168.168.167:9202
 OSF_DB_HOST=192.168.168.167
 DB_HOST=192.168.168.167
 REDIS_HOST=redis://192.168.168.167:6379
diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index efddf2484b8..6fd9b2a4d70 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -316,10 +316,18 @@
 HASHIDS_SALT = 'pinkhimalayan'
 
 # django-elasticsearch-metrics
-ELASTICSEARCH_DSL = {
-    'default': {
-        'hosts': osf_settings.ELASTIC6_URI,
-        'retry_on_timeout': True,
+DJELME_AUTOSETUP = True
+DJELME_BACKENDS = {
+    "osfmetrics_es6": {
+        "elasticsearch_metrics.imps.elastic6": {
+            "hosts": osf_settings.ELASTIC6_URI,
+            "retry_on_timeout": True,
+        },
+    },
+    "osfmetrics_es8": {
+        "elasticsearch_metrics.imps.elastic8": {
+            "hosts": osf_settings.ELASTIC8_URI,
+        },
     },
 }
 # Store yearly indices for time-series metrics
diff --git a/docker-compose.yml b/docker-compose.yml
index f00b589f7e0..f26c3617b67 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,6 +13,8 @@ volumes:
     external: false
   elasticsearch6_data_vol:
     external: false
+  elasticsearch8_data_vol:
+    external: false
   rabbitmq_vol:
     external: false
   ember_osf_web_dist_vol:
@@ -76,6 +78,18 @@ services:
       - elasticsearch6_data_vol:/usr/share/elasticsearch/data
     stdin_open: true
 
+  elasticsearch8:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11
+    platform: linux/arm64
+    environment:
+      - xpack.security.enabled=false
+      - discovery.type=single-node
+    ports:
+      - 9202:9200
+    volumes:
+      - elasticsearch8_data_vol:/usr/share/elasticsearch/data
+    stdin_open: true
+
   postgres:
     image: postgres:15.4
     command:
diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py
index 39b3b74129b..41ea012fda5 100644
--- a/osf/metrics/counted_usage.py
+++ b/osf/metrics/counted_usage.py
@@ -4,7 +4,7 @@
 from urllib.parse import urlsplit
 
 from elasticsearch6_dsl import InnerDoc, analyzer, tokenizer
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 from elasticsearch_metrics.signals import pre_save
 from django.dispatch import receiver
 import pytz
diff --git a/osf/metrics/preprint_metrics.py b/osf/metrics/preprint_metrics.py
index 9d02ec191a2..c45cec4f24a 100644
--- a/osf/metrics/preprint_metrics.py
+++ b/osf/metrics/preprint_metrics.py
@@ -1,5 +1,5 @@
 from elasticsearch.exceptions import NotFoundError
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 
 from .metric_mixin import MetricMixin
 
diff --git a/osf/metrics/registry_metrics.py b/osf/metrics/registry_metrics.py
index 475dca28673..9c779fe8c0b 100644
--- a/osf/metrics/registry_metrics.py
+++ b/osf/metrics/registry_metrics.py
@@ -1,4 +1,4 @@
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 
 from osf.utils.workflows import RegistrationModerationTriggers, RegistrationModerationStates
 from .metric_mixin import MetricMixin
diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py
index ffbcfb4c9b8..9d71ea7e8c2 100644
--- a/osf/metrics/reports.py
+++ b/osf/metrics/reports.py
@@ -4,7 +4,7 @@
 
 from django.dispatch import receiver
 from elasticsearch6_dsl import InnerDoc
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 from elasticsearch_metrics.signals import pre_save as metrics_pre_save
 
 from osf.metrics.utils import stable_key, YearMonth
diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py
index 46375184f95..9301cdb114f 100644
--- a/osf_tests/metrics/test_daily_report.py
+++ b/osf_tests/metrics/test_daily_report.py
@@ -2,7 +2,7 @@
 from unittest import mock
 
 import pytest
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 
 from osf.metrics.reports import DailyReport, ReportInvalid
 
diff --git a/osf_tests/metrics/test_metric_mixin.py b/osf_tests/metrics/test_metric_mixin.py
index 4a2c32f7e71..ec9b2d302de 100644
--- a/osf_tests/metrics/test_metric_mixin.py
+++ b/osf_tests/metrics/test_metric_mixin.py
@@ -1,6 +1,6 @@
 from unittest import mock
 import pytest
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 
 from osf.metrics.metric_mixin import MetricMixin
 from osf.models import OSFUser
diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
index 3c841e6555c..cc8c4137cb2 100644
--- a/osf_tests/metrics/test_monthly_report.py
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -2,7 +2,7 @@
 from unittest import mock
 
 import pytest
-from elasticsearch_metrics import metrics
+import elasticsearch_metrics.imps.elastic6 as metrics
 
 from osf.metrics.reports import MonthlyReport, ReportInvalid, PublicItemUsageReport
 from osf.metrics.utils import YearMonth
diff --git a/poetry.lock b/poetry.lock
index 83ca13f7a00..062070e3f1e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1085,27 +1085,24 @@ Django = ">=2.0"
 
 [[package]]
 name = "django-elasticsearch-metrics"
-version = "2022.0.6"
+version = "2026.0.0"
 description = "Django app for storing time-series metrics in Elasticsearch."
 optional = false
-python-versions = "*"
+python-versions = ">=3.10,<4"
 groups = ["main"]
 files = []
 develop = false
 
-[package.dependencies]
-elasticsearch6-dsl = ">=6.3.0,<7.0.0"
-
 [package.extras]
-dev = ["factory-boy (==2.11.1)", "flake8 (==5.0.4)", "flake8-bugbear (==18.8.0)", "konch (>=3.0.0)", "mock", "pre-commit (==2.17.0)", "pytest", "pytest-django (==3.10.0)", "tox"]
-lint = ["flake8 (==5.0.4)", "flake8-bugbear (==18.8.0)", "pre-commit (==2.17.0)"]
-tests = ["factory-boy (==2.11.1)", "mock", "pytest", "pytest-django (==3.10.0)"]
+anydjango = ["django"]
+elastic6 = ["elasticsearch6-dsl (>=6.3.0,<7.0.0)"]
+elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "f5b9312914154e213aa01731e934c593e3434269"
-resolved_reference = "f5b9312914154e213aa01731e934c593e3434269"
+reference = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"
+resolved_reference = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"
 
 [[package]]
 name = "django-extensions"
@@ -1361,14 +1358,14 @@ stone = ">=2"
 
 [[package]]
 name = "elastic-transport"
-version = "8.13.0"
+version = "8.17.1"
 description = "Transport classes and utilities shared among Python Elastic client libraries"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "elastic-transport-8.13.0.tar.gz", hash = "sha256:2410ec1ff51221e8b3a01c0afa9f0d0498e1386a269283801f5c12f98e42dc45"},
-    {file = "elastic_transport-8.13.0-py3-none-any.whl", hash = "sha256:aec890afdddd057762b27ff3553b0be8fa4673ec1a4fd922dfbd00325874bb3d"},
+    {file = "elastic_transport-8.17.1-py3-none-any.whl", hash = "sha256:192718f498f1d10c5e9aa8b9cf32aed405e469a7f0e9d6a8923431dbb2c59fb8"},
+    {file = "elastic_transport-8.17.1.tar.gz", hash = "sha256:5edef32ac864dca8e2f0a613ef63491ee8d6b8cfb52881fa7313ba9290cac6d2"},
 ]
 
 [package.dependencies]
@@ -1376,7 +1373,7 @@ certifi = "*"
 urllib3 = ">=1.26.2,<3"
 
 [package.extras]
-develop = ["aiohttp", "furo", "httpx", "mock", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"]
+develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"]
 
 [[package]]
 name = "elasticsearch"
@@ -1471,6 +1468,32 @@ six = "*"
 [package.extras]
 develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "sphinx", "sphinx-rtd-theme"]
 
+[[package]]
+name = "elasticsearch8"
+version = "8.19.3"
+description = "Python client for Elasticsearch"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "elasticsearch8-8.19.3-py3-none-any.whl", hash = "sha256:4b52e59e68aea6f59bf37c28f6f4512333302dd8a52e26c17d0f10c076d833a1"},
+    {file = "elasticsearch8-8.19.3.tar.gz", hash = "sha256:7effe95b360241b6d56ef68219037a90ad0f56723614db54bbe57d33058402f4"},
+]
+
+[package.dependencies]
+elastic-transport = ">=8.15.1,<9"
+python-dateutil = "*"
+typing-extensions = "*"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "mypy", "nox", "numpy", "orjson", "pandas", "pyarrow ; python_version < \"3.14\"", "pyright", "pytest", "pytest-asyncio", "pytest-cov", "pytest-mock", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "tqdm", "twine", "types-python-dateutil", "types-tqdm", "unasync"]
+docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"]
+orjson = ["orjson (>=3)"]
+pyarrow = ["pyarrow (>=1)"]
+requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"]
+vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"]
+
 [[package]]
 name = "email-validator"
 version = "2.1.1"
@@ -4412,6 +4435,18 @@ files = [
     {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"},
 ]
 
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
+    {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
+]
+
 [[package]]
 name = "tzdata"
 version = "2024.1"
@@ -4715,4 +4750,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "2bc7e95f03d05e8b3335514e887b590acdab5cb2a44fc47bde870bdf8e465bf2"
+content-hash = "2175d011bdf45da06721a3cf70377730725e59a52f6ac4357152c38e4427d1e3"
diff --git a/pyproject.toml b/pyproject.toml
index b1646584209..700b836895b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,9 @@ furl = "2.1.3"
 elasticsearch2 = "2.5.1"
 elasticsearch = "6.8.2" # max version to support elasticsearch6
 elasticsearch-dsl = "6.4.0" # max version to support elasticsearch6
-elastic-transport = "8.13.0"
+elasticsearch6-dsl = "6.4.0"
+elasticsearch8 = "8.19.3"
+elastic-transport = "8.17.1"
 google-api-python-client = "2.123.0"
 google-auth = "2.29.0"
 Babel = "2.14.0"
@@ -90,7 +92,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "f5b9312914154e213aa01731e934c593e3434269"} # branch is feature/pin-esdsl
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"} # branch is feature/pin-esdsl
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 2d3dcecba3b..d0ae58dc863 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -113,6 +113,7 @@ def parent_dir(path):
 SEARCH_ENGINE = 'elastic'  # Can be 'elastic', or None
 ELASTIC_URI = '127.0.0.1:9200'
 ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201')
+ELASTIC8_URI = os.environ.get('ELASTIC8_URI', '127.0.0.1:9202')
 ELASTIC_TIMEOUT = 10
 ELASTIC_INDEX = 'website'
 ELASTIC_KWARGS = {

From 590f7a2c405d1c30dc012d0d436f843e8c1afa19 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 23 Mar 2026 15:04:59 +0200
Subject: [PATCH 002/100] fix test fails

---
 api/base/settings/defaults.py | 14 +++++-----
 conftest.py                   | 50 ++++++++---------------------------
 2 files changed, 18 insertions(+), 46 deletions(-)

diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index 6fd9b2a4d70..816586ffcfb 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -318,15 +318,15 @@
 # django-elasticsearch-metrics
 DJELME_AUTOSETUP = True
 DJELME_BACKENDS = {
-    "osfmetrics_es6": {
-        "elasticsearch_metrics.imps.elastic6": {
-            "hosts": osf_settings.ELASTIC6_URI,
-            "retry_on_timeout": True,
+    'osfmetrics_es6': {
+        'elasticsearch_metrics.imps.elastic6': {
+            'hosts': osf_settings.ELASTIC6_URI,
+            'retry_on_timeout': True,
         },
     },
-    "osfmetrics_es8": {
-        "elasticsearch_metrics.imps.elastic8": {
-            "hosts": osf_settings.ELASTIC8_URI,
+    'osfmetrics_es8': {
+        'elasticsearch_metrics.imps.elastic8': {
+            'hosts': osf_settings.ELASTIC8_URI,
         },
     },
 }
diff --git a/conftest.py b/conftest.py
index 9494e3d296e..59b8def87c9 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,14 +1,11 @@
-import contextlib
 from unittest import mock
 import logging
 import os
 import re
 
-from django.core.management import call_command
 from django.db import transaction
-from elasticsearch import exceptions as es_exceptions
 from elasticsearch_dsl.connections import connections
-from elasticsearch_metrics.registry import registry as es_metrics_registry
+from elasticsearch_metrics.tests._test_util import RealElasticTestCase
 from faker import Factory
 import pytest
 import responses
@@ -138,47 +135,22 @@ def es6_client(setup_connections):
 
 
 @pytest.fixture(scope='function', autouse=True)
-def _es_metrics_marker(request, worker_id):
+def _es_metrics_marker(request):
     """Clear out all indices and index templates before and after
     tests marked with `es_metrics`.
     """
     marker = request.node.get_closest_marker('es_metrics')
-    if marker:
-        es6_client = request.getfixturevalue('es6_client')
-        _temp_prefix = 'temp_metrics_'
-        _temp_wildcard = f'{_temp_prefix}-{worker_id}*'
-
-        def _teardown_es_temps():
-            es6_client.indices.delete(index=_temp_wildcard)
-            try:
-                es6_client.indices.delete_template(_temp_wildcard)
-            except es_exceptions.NotFoundError:
-                pass
-
-        @contextlib.contextmanager
-        def _mock_metric_names():
-            with contextlib.ExitStack() as _exit:
-                for _metric_class in es_metrics_registry.get_metrics():
-                    _exit.enter_context(mock.patch.object(
-                        _metric_class,
-                        '_template_name',  # also used to construct index names
-                        f'{_temp_prefix}-{worker_id}{_metric_class._template_name}',
-                    ))
-                    _exit.enter_context(mock.patch.object(
-                        _metric_class,
-                        '_template',  # a wildcard string for indexes and templates
-                        f'{_temp_prefix}-{worker_id}{_metric_class._template}',
-                    ))
-                yield
-
-        _teardown_es_temps()
-        with _mock_metric_names():
-            call_command('sync_metrics')
-            yield
-        _teardown_es_temps()
-    else:
+
+    if not marker:
         yield
+        return
+
+    es6_test_case = RealElasticTestCase()
+    es6_test_case.setup_backends()
+
+    yield
 
+    es6_test_case.teardown_backends()
 
 @pytest.fixture
 def mock_share_responses():

From 6ebdd8eb8d195f5727ca07ef1bf8ebc95ed8b045 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 23 Mar 2026 15:26:33 +0200
Subject: [PATCH 003/100] fix poetry issue

---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 062070e3f1e..fe2f9cc9721 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1085,7 +1085,7 @@ Django = ">=2.0"
 
 [[package]]
 name = "django-elasticsearch-metrics"
-version = "2026.0.0"
+version = "2026.0.3"
 description = "Django app for storing time-series metrics in Elasticsearch."
 optional = false
 python-versions = ">=3.10,<4"
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"
-resolved_reference = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"
+reference = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"
+resolved_reference = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"
 
 [[package]]
 name = "django-extensions"
@@ -4750,4 +4750,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "2175d011bdf45da06721a3cf70377730725e59a52f6ac4357152c38e4427d1e3"
+content-hash = "68eafe36dddc2e9380dc164244e54830ed593d23af348df72f776c40b102d99c"
diff --git a/pyproject.toml b/pyproject.toml
index 700b836895b..48c5e8dd0a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -92,7 +92,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "36fc0bbe001ee792f4ab5723eaeec295b8a20eb2"} # branch is feature/pin-esdsl
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"} # branch is feature/pin-esdsl
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From fbe2a0813b86f96687c0434ed2693e9299ac6676 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 23 Mar 2026 16:50:46 +0200
Subject: [PATCH 004/100] add connection

---
 conftest.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/conftest.py b/conftest.py
index 59b8def87c9..a0edc40b20a 100644
--- a/conftest.py
+++ b/conftest.py
@@ -5,6 +5,7 @@
 
 from django.db import transaction
 from elasticsearch_dsl.connections import connections
+from website import settings as osf_settings
 from elasticsearch_metrics.tests._test_util import RealElasticTestCase
 from faker import Factory
 import pytest
@@ -145,6 +146,11 @@ def _es_metrics_marker(request):
         yield
         return
 
+    connections.create_connection(
+        alias='osfmetrics_es6',
+        hosts=osf_settings.ELASTIC6_URI,
+    )
+
     es6_test_case = RealElasticTestCase()
     es6_test_case.setup_backends()
 

From ea78a15f04217d14476a19ad7ba46bbab04e2d44 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 24 Mar 2026 16:30:34 +0200
Subject: [PATCH 005/100] remove connection, add proper setUp and tearDown

---
 conftest.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/conftest.py b/conftest.py
index a0edc40b20a..d08c7b1fdff 100644
--- a/conftest.py
+++ b/conftest.py
@@ -146,17 +146,13 @@ def _es_metrics_marker(request):
         yield
         return
 
-    connections.create_connection(
-        alias='osfmetrics_es6',
-        hosts=osf_settings.ELASTIC6_URI,
-    )
-
-    es6_test_case = RealElasticTestCase()
-    es6_test_case.setup_backends()
-
-    yield
-
-    es6_test_case.teardown_backends()
+    class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True): ...
+    es6_test_case = _Es6TestCase()
+    es6_test_case.setUp()
+    try:
+        yield
+    finally:
+        es6_test_case.tearDown()
 
 @pytest.fixture
 def mock_share_responses():

From 0efd0b13806ec84da8b77af5c294e7f5af966883 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 25 Mar 2026 13:52:16 +0200
Subject: [PATCH 006/100] remove elasticsearch and elasticsearch-dsl

---
 addons/base/views.py                          |  2 +-
 api/metrics/views.py                          |  2 +-
 api_tests/metrics/test_preprint_metrics.py    |  2 +-
 conftest.py                                   |  5 +++
 .../commands/monthly_reporters_go.py          |  2 +-
 osf/metrics/metric_mixin.py                   |  2 +-
 osf/metrics/preprint_metrics.py               |  2 +-
 poetry.lock                                   | 41 +------------------
 pyproject.toml                                |  3 +-
 9 files changed, 13 insertions(+), 48 deletions(-)

diff --git a/addons/base/views.py b/addons/base/views.py
index 5ff3d6e7093..ebcd662966b 100644
--- a/addons/base/views.py
+++ b/addons/base/views.py
@@ -14,7 +14,7 @@
 import waffle
 from django.db import transaction
 from django.contrib.contenttypes.models import ContentType
-from elasticsearch import exceptions as es_exceptions
+from elasticsearch6 import exceptions as es_exceptions
 from rest_framework import status as http_status
 
 from api.caching.tasks import update_storage_usage_with_size
diff --git a/api/metrics/views.py b/api/metrics/views.py
index daaa684d13a..976837ec220 100644
--- a/api/metrics/views.py
+++ b/api/metrics/views.py
@@ -6,7 +6,7 @@
 from django.http import JsonResponse, HttpResponse, Http404
 from django.utils import timezone
 
-from elasticsearch.exceptions import NotFoundError, RequestError
+from elasticsearch6.exceptions import NotFoundError, RequestError
 from elasticsearch_dsl.connections import get_connection
 
 from framework.auth.oauth_scopes import CoreScopes
diff --git a/api_tests/metrics/test_preprint_metrics.py b/api_tests/metrics/test_preprint_metrics.py
index 1bde8719b75..cd9b8041c2d 100644
--- a/api_tests/metrics/test_preprint_metrics.py
+++ b/api_tests/metrics/test_preprint_metrics.py
@@ -8,7 +8,7 @@
 
 from django.utils import timezone
 from waffle.testutils import override_switch
-from elasticsearch.exceptions import RequestError
+from elasticsearch6.exceptions import RequestError
 
 from osf import features
 from api.base.settings import API_PRIVATE_BASE as API_BASE
diff --git a/conftest.py b/conftest.py
index d08c7b1fdff..6a6be40a7d9 100644
--- a/conftest.py
+++ b/conftest.py
@@ -146,6 +146,11 @@ def _es_metrics_marker(request):
         yield
         return
 
+    connections.create_connection(
+        alias='osfmetrics_es6',
+        hosts=osf_settings.ELASTIC6_URI,
+    )
+
     class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True): ...
     es6_test_case = _Es6TestCase()
     es6_test_case.setUp()
diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py
index 83ed5f6d985..218b45da1df 100644
--- a/osf/management/commands/monthly_reporters_go.py
+++ b/osf/management/commands/monthly_reporters_go.py
@@ -3,7 +3,7 @@
 
 from django.core.management.base import BaseCommand
 from django.db import OperationalError as DjangoOperationalError
-from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
+from elasticsearch6.exceptions import ConnectionError as ElasticConnectionError
 from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
diff --git a/osf/metrics/metric_mixin.py b/osf/metrics/metric_mixin.py
index 724ab1958da..df87d5123b1 100644
--- a/osf/metrics/metric_mixin.py
+++ b/osf/metrics/metric_mixin.py
@@ -2,7 +2,7 @@
 
 from django.db import models
 from django.utils import timezone
-from elasticsearch.exceptions import NotFoundError
+from elasticsearch6.exceptions import NotFoundError
 import pytz
 
 
diff --git a/osf/metrics/preprint_metrics.py b/osf/metrics/preprint_metrics.py
index c45cec4f24a..d284d80827e 100644
--- a/osf/metrics/preprint_metrics.py
+++ b/osf/metrics/preprint_metrics.py
@@ -1,4 +1,4 @@
-from elasticsearch.exceptions import NotFoundError
+from elasticsearch6.exceptions import NotFoundError
 import elasticsearch_metrics.imps.elastic6 as metrics
 
 from .metric_mixin import MetricMixin
diff --git a/poetry.lock b/poetry.lock
index fe2f9cc9721..530b6252e18 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1375,45 +1375,6 @@ urllib3 = ">=1.26.2,<3"
 [package.extras]
 develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"]
 
-[[package]]
-name = "elasticsearch"
-version = "6.8.2"
-description = "Python client for Elasticsearch"
-optional = false
-python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <4"
-groups = ["main"]
-files = [
-    {file = "elasticsearch-6.8.2-py2.py3-none-any.whl", hash = "sha256:1aedf00b73f5d1e77cb4df70fec58f2efb664be4ce2686374239aa6c0373c65c"},
-    {file = "elasticsearch-6.8.2.tar.gz", hash = "sha256:c3a560bb83e4981b5a5c82080d2ceb99686d33692ef53365656129478aa5ddb2"},
-]
-
-[package.dependencies]
-urllib3 = ">=1.21.1"
-
-[package.extras]
-develop = ["coverage", "mock", "nose", "nosexcover", "numpy", "pandas", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx (<1.7)", "sphinx-rtd-theme"]
-requests = ["requests (>=2.4.0,<3.0.0)"]
-
-[[package]]
-name = "elasticsearch-dsl"
-version = "6.4.0"
-description = "Python client for Elasticsearch"
-optional = false
-python-versions = "*"
-groups = ["main"]
-files = [
-    {file = "elasticsearch-dsl-6.4.0.tar.gz", hash = "sha256:26416f4dd46ceca43d62ef74970d9de4bdd6f4b0f163316f0b432c9e61a08bec"},
-    {file = "elasticsearch_dsl-6.4.0-py2.py3-none-any.whl", hash = "sha256:f60aea7fd756ac1fbe7ce114bbf4949aefbf495dfe8896640e787c67344f12f6"},
-]
-
-[package.dependencies]
-elasticsearch = ">=6.0.0,<7.0.0"
-python-dateutil = "*"
-six = "*"
-
-[package.extras]
-develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "sphinx", "sphinx-rtd-theme"]
-
 [[package]]
 name = "elasticsearch2"
 version = "2.5.1"
@@ -4750,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "68eafe36dddc2e9380dc164244e54830ed593d23af348df72f776c40b102d99c"
+content-hash = "755718b3333d5fe7983ac875532429e80eb2d45684002ae9212bc96a7800d014"
diff --git a/pyproject.toml b/pyproject.toml
index 48c5e8dd0a5..f40ad4fb107 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,8 +31,7 @@ Markupsafe = "2.1.5"
 blinker = "1.7.0"
 furl = "2.1.3"
 elasticsearch2 = "2.5.1"
-elasticsearch = "6.8.2" # max version to support elasticsearch6
-elasticsearch-dsl = "6.4.0" # max version to support elasticsearch6
+elasticsearch6= "6.8.2"
 elasticsearch6-dsl = "6.4.0"
 elasticsearch8 = "8.19.3"
 elastic-transport = "8.17.1"

From 684a83f1937da1cd1eb2c21f389199ada165102a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 25 Mar 2026 14:04:48 +0200
Subject: [PATCH 007/100] remove elasticsearch-dsl

---
 api/base/elasticsearch_dsl_views.py        | 8 ++++----
 api/metrics/views.py                       | 2 +-
 conftest.py                                | 2 +-
 osf/management/commands/reindex_es6.py     | 2 +-
 osf/metrics/reporters/public_item_usage.py | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/api/base/elasticsearch_dsl_views.py b/api/base/elasticsearch_dsl_views.py
index 6199fd82d0e..ecf2825d4e8 100644
--- a/api/base/elasticsearch_dsl_views.py
+++ b/api/base/elasticsearch_dsl_views.py
@@ -3,7 +3,7 @@
 import datetime
 import typing
 
-import elasticsearch_dsl as edsl
+import elasticsearch6_dsl as edsl
 from rest_framework import generics, exceptions as drf_exceptions
 from rest_framework.settings import api_settings as drf_settings
 from api.base.settings.defaults import REPORT_FILENAME_FORMAT
@@ -23,7 +23,7 @@
 
 
 class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC):
-    '''abstract view class using `elasticsearch_dsl.Search` as a queryset-analogue
+    '''abstract view class using `elasticsearch6_dsl.Search` as a queryset-analogue
 
     builds a `Search` based on `self.get_default_search()` and the request's
     query parameters for filtering, sorting, and pagination -- fetches only
@@ -36,7 +36,7 @@ class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView,
 
     @abc.abstractmethod
     def get_default_search(self) -> edsl.Search | None:
-        '''the base `elasticsearch_dsl.Search` for this list, based on url path
+        '''the base `elasticsearch6_dsl.Search` for this list, based on url path
 
         (common jsonapi query parameters will be considered automatically)
         '''
@@ -95,7 +95,7 @@ def finalize_response(self, request, response, *args, **kwargs):
     # (filtering handled in-view to reuse logic from FilterMixin)
     filter_backends = ()
 
-    # note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on,
+    # note: because elasticsearch6_dsl.Search supports slicing and gives results when iterated on,
     #       it works fine with default pagination
 
     # override rest_framework.generics.GenericAPIView
diff --git a/api/metrics/views.py b/api/metrics/views.py
index 976837ec220..c6e4d56c9b9 100644
--- a/api/metrics/views.py
+++ b/api/metrics/views.py
@@ -7,7 +7,7 @@
 from django.utils import timezone
 
 from elasticsearch6.exceptions import NotFoundError, RequestError
-from elasticsearch_dsl.connections import get_connection
+from elasticsearch6_dsl.connections import get_connection
 
 from framework.auth.oauth_scopes import CoreScopes
 
diff --git a/conftest.py b/conftest.py
index 6a6be40a7d9..7665782cb34 100644
--- a/conftest.py
+++ b/conftest.py
@@ -4,7 +4,7 @@
 import re
 
 from django.db import transaction
-from elasticsearch_dsl.connections import connections
+from elasticsearch6_dsl.connections import connections
 from website import settings as osf_settings
 from elasticsearch_metrics.tests._test_util import RealElasticTestCase
 from faker import Factory
diff --git a/osf/management/commands/reindex_es6.py b/osf/management/commands/reindex_es6.py
index c37d0e34f2c..8961ea6fff1 100644
--- a/osf/management/commands/reindex_es6.py
+++ b/osf/management/commands/reindex_es6.py
@@ -4,7 +4,7 @@
 import logging
 
 from django.core.management.base import BaseCommand
-from elasticsearch_dsl import connections
+from elasticsearch6_dsl import connections
 from elasticsearch_metrics.registry import registry
 
 logger = logging.getLogger(__name__)
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index cc401d50bd7..7df405d385f 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -4,7 +4,7 @@
 
 import waffle
 if typing.TYPE_CHECKING:
-    import elasticsearch_dsl as edsl
+    import elasticsearch6_dsl as edsl
 
 import osf.features
 from osf.metadata.osf_gathering import OsfmapPartition

From 11516100b737a81e9c9aeb88819c786dfcffb214 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 25 Mar 2026 16:15:24 +0200
Subject: [PATCH 008/100] remove sleep() and refresh indices

---
 .../views/test_institution_department_list.py         | 11 ++++++-----
 .../views/test_institution_summary_metrics.py         |  7 ++++++-
 .../views/test_institution_user_metric_list.py        | 10 +++++++++-
 api_tests/metrics/test_composite_query.py             |  2 +-
 .../metrics/test_registries_moderation_metrics.py     |  4 ++--
 conftest.py                                           |  3 ++-
 6 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/api_tests/institutions/views/test_institution_department_list.py b/api_tests/institutions/views/test_institution_department_list.py
index c2a5c0fcf99..8b785504756 100644
--- a/api_tests/institutions/views/test_institution_department_list.py
+++ b/api_tests/institutions/views/test_institution_department_list.py
@@ -44,7 +44,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution):
             department_name='Old Department',
             public_project_count=1,
             private_project_count=1,
-        ).save(refresh=True)
+        ).save()
 
         _this_month = YearMonth.from_date(datetime.date.today())
 
@@ -56,7 +56,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution):
             department_name='New Department',
             public_project_count=1,
             private_project_count=1,
-        ).save(refresh=True)
+        ).save()
 
         # A second user entered the department
         InstitutionalUserReport(
@@ -66,7 +66,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution):
             department_name='New Department',
             public_project_count=1,
             private_project_count=1,
-        ).save(refresh=True)
+        ).save()
 
         # A new department with a single user to test sorting
         InstitutionalUserReport(
@@ -76,7 +76,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution):
             department_name='Smaller Department',
             public_project_count=1,
             private_project_count=1,
-        ).save(refresh=True)
+        ).save()
 
         # A user with no department
         InstitutionalUserReport(
@@ -85,7 +85,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution):
             institution_id=institution._id,
             public_project_count=1,
             private_project_count=1,
-        ).save(refresh=True)
+        ).save()
 
     @pytest.fixture()
     def admin(self, institution):
@@ -113,6 +113,7 @@ def test_auth(self, app, url, user, admin):
         assert resp.json['data'] == []
 
     def test_get(self, app, url, admin, institution, populate_counts):
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
         resp = app.get(url, auth=admin.auth)
 
         assert resp.json['data'] == [{
diff --git a/api_tests/institutions/views/test_institution_summary_metrics.py b/api_tests/institutions/views/test_institution_summary_metrics.py
index 41983458d2e..6dd6c5bbda3 100644
--- a/api_tests/institutions/views/test_institution_summary_metrics.py
+++ b/api_tests/institutions/views/test_institution_summary_metrics.py
@@ -84,6 +84,7 @@ def test_get_empty(self, app, url, institutional_admin):
         assert resp.json['meta'] == {'version': '2.0'}
 
     def test_get_report(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern)
         resp = app.get(url, auth=institutional_admin.auth)
         assert resp.status_code == 200
 
@@ -149,6 +150,7 @@ def test_get_report_with_multiple_months_and_institutions(
             monthly_logged_in_user_count=270,
             monthly_active_user_count=260,
         )
+        InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern)
 
         resp = app.get(url, auth=institutional_admin.auth)
         assert resp.status_code == 200
@@ -189,6 +191,7 @@ def test_get_with_valid_report_dates(self, app, url, institution, institutional_
             institution,
             user_count=4133,
         )
+        InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern)
 
         resp = app.get(f'{url}?report_yearmonth=2024-08', auth=institutional_admin.auth)
         assert resp.status_code == 200
@@ -213,6 +216,7 @@ def test_get_with_invalid_report_date(self, app, url, institution, institutional
             institution,
             user_count=999,
         )
+        InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern)
 
         # Request with an invalid report_date format
         resp = app.get(f'{url}?report_yearmonth=invalid-date', auth=institutional_admin.auth)
@@ -233,6 +237,7 @@ def test_get_without_report_date_uses_most_recent(self, app, url, institution, i
             institution,
             user_count=999,
         )
+        InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern)
 
         resp = app.get(url, auth=institutional_admin.auth)
         assert resp.status_code == 200
@@ -247,5 +252,5 @@ def _summary_report_factory(yearmonth, institution, **kwargs):
         institution_id=institution._id,
         **kwargs,
     )
-    report.save(refresh=True)
+    report.save()
     return report
diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py
index 0826dcd0161..d2b99da435f 100644
--- a/api_tests/institutions/views/test_institution_user_metric_list.py
+++ b/api_tests/institutions/views/test_institution_user_metric_list.py
@@ -89,6 +89,7 @@ def test_get_empty(self, app, url, institutional_admin):
         assert _resp.json['data'] == []
 
     def test_get_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
         _resp = app.get(url, auth=institutional_admin.auth)
         assert _resp.status_code == 200
         assert len(_resp.json['data']) == len(reports)
@@ -100,6 +101,7 @@ def test_get_reports(self, app, url, institutional_admin, institution, reports,
             assert len(response_object['attributes']['contacts']) == 0
 
     def test_filter_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
         for _query, _expected_user_ids in (
             ({'filter[department]': 'nunavum'}, set()),
             ({'filter[department]': 'incidentally'}, set()),
@@ -135,6 +137,7 @@ def test_filter_reports(self, app, url, institutional_admin, institution, report
             assert set(_user_ids(_resp)) == _expected_user_ids
 
     def test_sort_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
         for _query, _expected_user_id_list in (
             ({'sort': 'storage_byte_count'}, ['u_sparse', 'u_orc', 'u_blargl', 'u_orcomma']),
             ({'sort': '-storage_byte_count'}, ['u_orcomma', 'u_blargl', 'u_orc', 'u_sparse']),
@@ -144,6 +147,7 @@ def test_sort_reports(self, app, url, institutional_admin, institution, reports,
             assert list(_user_ids(_resp)) == _expected_user_id_list
 
     def test_paginate_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
         for _query, _expected_user_id_list in (
             ({'sort': 'storage_byte_count', 'page[size]': 2}, ['u_sparse', 'u_orc']),
             ({'sort': 'storage_byte_count', 'page[size]': 2, 'page': 2}, ['u_blargl', 'u_orcomma']),
@@ -178,6 +182,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu
             month_last_active='2018-02',
             month_last_login='2018-02',
         )
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
 
         resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth)
         assert resp.status_code == 200
@@ -281,6 +286,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu
                 str(736662999298 + i),
                 f'Jalen Hurts #{i}',
             ])
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
 
         # Make request for CSV format with page[size]=10
         resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth)
@@ -346,6 +352,7 @@ def test_get_report_format_table_json(self, app, url, institutional_admin, insti
             month_last_active='2018-02',
             month_last_login='2018-02',
         )
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
 
         resp = app.get(f'{url}?format=json_report', auth=institutional_admin.auth)
         assert resp.status_code == 200
@@ -411,6 +418,7 @@ def test_correct_number_of_contact_messages(self, app, url, institutional_admin,
             department_name='a department, or so, that happens, incidentally, to have commas',
             storage_byte_count=736662999298,
         )
+        InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern)
 
         receiver = user1
         with capture_notifications():
@@ -477,5 +485,5 @@ def _report_factory(yearmonth, institution, **kwargs):
         institution_id=institution._id,
         **kwargs,
     )
-    _report.save(refresh=True)
+    _report.save()
     return _report
diff --git a/api_tests/metrics/test_composite_query.py b/api_tests/metrics/test_composite_query.py
index 0cd0b3bb180..a983ebd74fb 100644
--- a/api_tests/metrics/test_composite_query.py
+++ b/api_tests/metrics/test_composite_query.py
@@ -75,7 +75,7 @@ def test_elasticsearch_agg_query(self, app, user, base_url, preprint):
             path=preprint.primary_file.path,
             timestamp=datetime(year=2020, month=2, day=1)
         )
-        time.sleep(1)  # gives ES some time to update
+        PreprintDownload._get_connection().indices.refresh(PreprintDownload._template_pattern)
 
         resp = app.post_json_api(post_url, payload, auth=user.auth)
         assert resp.status_code == 200
diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py
index 93469b1b3b5..7f59a0a417e 100644
--- a/api_tests/metrics/test_registries_moderation_metrics.py
+++ b/api_tests/metrics/test_registries_moderation_metrics.py
@@ -32,7 +32,7 @@ def test_record_transitions(self, registration):
                 registration.creator,
                 'Metrics is easy'
             )
-        time.sleep(1)
+        RegistriesModerationMetrics._get_connection().indices.refresh(RegistriesModerationMetrics._template_pattern)
 
         assert RegistriesModerationMetrics.search().count() == 1
         data = RegistriesModerationMetrics.search().execute()['hits']['hits'][0]['_source']
@@ -81,7 +81,7 @@ def test_registries_moderation_view(self, app, user, base_url, registration):
                 registration.creator,
                 'Metrics is easy'
             )
-        time.sleep(1)
+        RegistriesModerationMetrics._get_connection().indices.refresh(RegistriesModerationMetrics._template_pattern)
 
         res = app.get(base_url, auth=user.auth, expect_errors=True)
         data = res.json
diff --git a/conftest.py b/conftest.py
index 7665782cb34..232b788c0fb 100644
--- a/conftest.py
+++ b/conftest.py
@@ -151,7 +151,8 @@ def _es_metrics_marker(request):
         hosts=osf_settings.ELASTIC6_URI,
     )
 
-    class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True): ...
+    class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True):
+        ...
     es6_test_case = _Es6TestCase()
     es6_test_case.setUp()
     try:

From 4649800a5c85efb7d94daf99c82a001c9348b360 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 25 Mar 2026 17:17:15 +0200
Subject: [PATCH 009/100] remove unused imports, comment out

---
 api_tests/metrics/test_composite_query.py               | 1 -
 api_tests/metrics/test_registries_moderation_metrics.py | 1 -
 osf/metrics/metric_mixin.py                             | 6 +++---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/api_tests/metrics/test_composite_query.py b/api_tests/metrics/test_composite_query.py
index a983ebd74fb..016677c3a11 100644
--- a/api_tests/metrics/test_composite_query.py
+++ b/api_tests/metrics/test_composite_query.py
@@ -1,4 +1,3 @@
-import time
 import pytest
 from datetime import datetime
 from osf_tests.factories import (
diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py
index 7f59a0a417e..0f3dddb79b6 100644
--- a/api_tests/metrics/test_registries_moderation_metrics.py
+++ b/api_tests/metrics/test_registries_moderation_metrics.py
@@ -1,7 +1,6 @@
 import pytest
 from waffle.testutils import override_switch
 
-import time
 from osf import features
 from osf_tests.factories import RegistrationFactory, AuthUserFactory
 from osf.utils.workflows import RegistrationModerationStates, RegistrationModerationTriggers
diff --git a/osf/metrics/metric_mixin.py b/osf/metrics/metric_mixin.py
index df87d5123b1..68fa255b073 100644
--- a/osf/metrics/metric_mixin.py
+++ b/osf/metrics/metric_mixin.py
@@ -77,9 +77,9 @@ def _get_id_to_count(cls, size, metric_field, count_field, after=None, before=No
     # indices, determined from `after`
     @classmethod
     def search(cls, using=None, index=None, after=None, before=None, *args, **kwargs):
-        if not index and (before or after):
-            indices = cls._get_relevant_indices(after, before)
-            index = ','.join(indices)
+        # if not index and (before or after):
+        #     indices = cls._get_relevant_indices(after, before)
+        #     index = ','.join(indices)
         return super().search(using=using, index=index, *args, **kwargs)
 
     @classmethod

From 5ea0ed1bdbfac74fc910e7b095d049c712dc75ae Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 25 Mar 2026 11:47:09 -0400
Subject: [PATCH 010/100] chore: bump djelme dependency

---
 osf/metrics/metric_mixin.py                   |  6 +--
 .../management_commands/test_reindex_es6.py   |  2 +-
 poetry.lock                                   | 46 +++++++++----------
 pyproject.toml                                |  2 +-
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/osf/metrics/metric_mixin.py b/osf/metrics/metric_mixin.py
index 68fa255b073..df87d5123b1 100644
--- a/osf/metrics/metric_mixin.py
+++ b/osf/metrics/metric_mixin.py
@@ -77,9 +77,9 @@ def _get_id_to_count(cls, size, metric_field, count_field, after=None, before=No
     # indices, determined from `after`
     @classmethod
     def search(cls, using=None, index=None, after=None, before=None, *args, **kwargs):
-        # if not index and (before or after):
-        #     indices = cls._get_relevant_indices(after, before)
-        #     index = ','.join(indices)
+        if not index and (before or after):
+            indices = cls._get_relevant_indices(after, before)
+            index = ','.join(indices)
         return super().search(using=using, index=index, *args, **kwargs)
 
     @classmethod
diff --git a/osf_tests/management_commands/test_reindex_es6.py b/osf_tests/management_commands/test_reindex_es6.py
index 5e01be656a8..36158c18da6 100644
--- a/osf_tests/management_commands/test_reindex_es6.py
+++ b/osf_tests/management_commands/test_reindex_es6.py
@@ -10,7 +10,7 @@
     AuthUserFactory
 )
 
-from elasticsearch_metrics.field import Keyword
+from elasticsearch6_dsl import Keyword
 
 from tests.json_api_test_app import JSONAPITestApp
 
diff --git a/poetry.lock b/poetry.lock
index 530b6252e18..f0dca07d95c 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"
-resolved_reference = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"
+reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"
+resolved_reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"
 
 [[package]]
 name = "django-extensions"
@@ -1186,7 +1186,7 @@ files = [
 [package.dependencies]
 autopep8 = "*"
 Django = ">=3.2"
-gprof2dot = ">=2017.09.19"
+gprof2dot = ">=2017.9.19"
 sqlparse = "*"
 
 [[package]]
@@ -1739,12 +1739,12 @@ files = [
 [package.dependencies]
 google-auth = ">=2.14.1,<3.0.dev0"
 googleapis-common-protos = ">=1.56.2,<2.0.dev0"
-proto-plus = ">=1.22.3,<2.0.0dev"
+proto-plus = ">=1.22.3,<2.0.0.dev0"
 protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
 requests = ">=2.18.0,<3.0.0.dev0"
 
 [package.extras]
-grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""]
+grpc = ["grpcio (>=1.33.2,<2.0.dev0)", "grpcio (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""]
 grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
 grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
 
@@ -1820,11 +1820,11 @@ files = [
 ]
 
 [package.dependencies]
-google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev"
-google-auth = ">=1.25.0,<3.0dev"
+google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0.dev0"
+google-auth = ">=1.25.0,<3.0.dev0"
 
 [package.extras]
-grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"]
+grpc = ["grpcio (>=1.38.0,<2.0.dev0)", "grpcio-status (>=1.38.0,<2.0.dev0)"]
 
 [[package]]
 name = "google-cloud-storage"
@@ -1839,15 +1839,15 @@ files = [
 ]
 
 [package.dependencies]
-google-api-core = ">=2.15.0,<3.0.0dev"
-google-auth = ">=2.26.1,<3.0dev"
-google-cloud-core = ">=2.3.0,<3.0dev"
-google-crc32c = ">=1.0,<2.0dev"
+google-api-core = ">=2.15.0,<3.0.0.dev0"
+google-auth = ">=2.26.1,<3.0.dev0"
+google-cloud-core = ">=2.3.0,<3.0.dev0"
+google-crc32c = ">=1.0,<2.0.dev0"
 google-resumable-media = ">=2.6.0"
-requests = ">=2.18.0,<3.0.0dev"
+requests = ">=2.18.0,<3.0.0.dev0"
 
 [package.extras]
-protobuf = ["protobuf (<5.0.0dev)"]
+protobuf = ["protobuf (<5.0.0.dev0)"]
 
 [[package]]
 name = "google-crc32c"
@@ -1902,11 +1902,11 @@ files = [
 ]
 
 [package.dependencies]
-google-crc32c = ">=1.0,<2.0dev"
+google-crc32c = ">=1.0,<2.0.dev0"
 
 [package.extras]
-aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"]
-requests = ["requests (>=2.18.0,<3.0.0dev)"]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "google-auth (>=1.22.0,<2.0.dev0)"]
+requests = ["requests (>=2.18.0,<3.0.0.dev0)"]
 
 [[package]]
 name = "googleapis-common-protos"
@@ -2285,7 +2285,7 @@ files = [
 
 [package.dependencies]
 attrs = ">=22.2.0"
-jsonschema-specifications = ">=2023.03.6"
+jsonschema-specifications = ">=2023.3.6"
 referencing = ">=0.28.4"
 rpds-py = ">=0.7.1"
 
@@ -3042,7 +3042,7 @@ files = [
 ]
 
 [package.dependencies]
-protobuf = ">=3.19.0,<6.0.0dev"
+protobuf = ">=3.19.0,<6.0.0.dev0"
 
 [package.extras]
 testing = ["google-api-core (>=1.31.5)"]
@@ -4072,10 +4072,10 @@ files = [
 ]
 
 [package.dependencies]
-botocore = ">=1.33.2,<2.0a.0"
+botocore = ">=1.33.2,<2.0a0"
 
 [package.extras]
-crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+crt = ["botocore[crt] (>=1.33.2,<2.0a0)"]
 
 [[package]]
 name = "schema"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "755718b3333d5fe7983ac875532429e80eb2d45684002ae9212bc96a7800d014"
+content-hash = "c3108a036ae092e35f7696ffe230e271b774bb12e546db77bb8b12b5fb6eca7d"
diff --git a/pyproject.toml b/pyproject.toml
index f40ad4fb107..b40cdc704ab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "7a7f664469070dd52dc4d9401f6b6d2d9fe7ddf0"} # branch is feature/pin-esdsl
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 674f963f388689fcd67bbbcbad26468e09cc86b5 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 25 Mar 2026 10:29:17 -0400
Subject: [PATCH 011/100] wip: es8 djelme records (migration targets)

---
 osf/metrics/es8_metrics.py | 221 +++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 osf/metrics/es8_metrics.py

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
new file mode 100644
index 00000000000..ec20215449e
--- /dev/null
+++ b/osf/metrics/es8_metrics.py
@@ -0,0 +1,221 @@
+from __future__ import annotations
+import datetime
+
+import elasticsearch8.dsl as esdsl
+import elasticsearch_metrics.imps.elastic8 as djelme
+
+from osf.metrics.utils import YearMonth
+
+
+###
+# custom dsl fields
+
+class YearmonthField(esdsl.Date):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs, format='strict_year_month')
+
+    def deserialize(self, data):
+        if isinstance(data, int):
+            # elasticsearch stores dates in milliseconds since the unix epoch
+            _as_datetime = datetime.datetime.fromtimestamp(data // 1000)
+            return YearMonth.from_date(_as_datetime)
+        elif data is None:
+            return None
+        try:
+            return YearMonth.from_any(data)
+        except ValueError:
+            raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')
+
+    def serialize(self, data):
+        if isinstance(data, str):
+            return data
+        elif isinstance(data, YearMonth):
+            return str(data)
+        elif isinstance(data, (datetime.datetime, datetime.date)):
+            return str(YearMonth.from_date(data))
+        elif data is None:
+            return None
+        else:
+            raise ValueError(f'unsure how to serialize "{data}" (of type {type(data)}) as YYYY-MM')
+
+
+###
+# inner objects for events
+
+route_prefix_analyzer = esdsl.analyzer(
+    'route_prefix_analyzer',
+    tokenizer=esdsl.tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.'),
+)
+
+
+class PageviewInfo(esdsl.InnerDoc):
+    """PageviewInfo
+
+    for CountedAuthUsage generated by viewing a web page
+    """
+    # fields that should be provided
+    referer_url: str
+    page_url: str
+    page_title: str
+    route_name: str = esdsl.mapped_field(esdsl.Keyword(
+        fields={
+            'by_prefix': esdsl.Text(analyzer=route_prefix_analyzer),
+        },
+    ))
+
+    # fields autofilled from the above (see `_autofill_fields`)
+    page_path: str
+    referer_domain: str
+    hour_of_day: str
+
+
+###
+# Event records
+
+class OsfCountedUsageRecord(djelme.CountedUsageRecord):
+    '''
+
+    inherited fields:
+        platform_iri: str
+        database_iri: str
+        item_iri: str
+        sessionhour_id: str
+        within_iris: list[str]
+    '''
+    # osf-specific fields
+    item_osfid: str
+    item_type: str
+    item_public: bool
+    user_is_authenticated: bool
+    action_labels: list[str]
+    pageview_info: PageviewInfo
+
+
+###
+# Reusable inner objects for reports
+
+class RunningTotal(esdsl.InnerDoc):
+    total: int
+    total_daily: int
+
+
+class FileRunningTotals(esdsl.InnerDoc):
+    total: int
+    public: int
+    private: int
+    total_daily: int
+    public_daily: int
+    private_daily: int
+
+
+class NodeRunningTotals(esdsl.InnerDoc):
+    total: int
+    total_excluding_spam: int
+    public: int
+    private: int
+    total_daily: int
+    total_daily_excluding_spam: int
+    public_daily: int
+    private_daily: int
+
+
+class RegistrationRunningTotals(esdsl.InnerDoc):
+    total: int
+    public: int
+    embargoed: int
+    embargoed_v2: int
+    withdrawn: int
+    total_daily: int
+    public_daily: int
+    embargoed_daily: int
+    embargoed_v2_daily: int
+    withdrawn_daily: int
+
+
+###
+# Cyclic reports
+
+
+class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+    node_confirmed_spam: int
+    node_confirmed_ham: int
+    node_flagged: int
+    registration_confirmed_spam: int
+    registration_confirmed_ham: int
+    registration_flagged: int
+    preprint_confirmed_spam: int
+    preprint_confirmed_ham: int
+    preprint_flagged: int
+    user_marked_as_spam: int
+    user_marked_as_ham: int
+
+
+class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+    # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',)
+    institution_id: str
+    # user info:
+    user_id: str
+    user_name: str
+    department_name: str
+    month_last_login = YearmonthField()
+    month_last_active = YearmonthField()
+    account_creation_date = YearmonthField()
+    orcid_id: str
+    # counts:
+    public_project_count: int
+    private_project_count: int
+    public_registration_count: int
+    embargoed_registration_count: int
+    published_preprint_count: int
+    public_file_count: int = esdsl.mapped_field(esdsl.Long())
+    storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
+
+
+class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+    UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', )
+    institution_id: str
+    user_count: int
+    public_project_count: int
+    private_project_count: int
+    public_registration_count: int
+    embargoed_registration_count: int
+    published_preprint_count: int
+    storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
+    public_file_count: int = esdsl.mapped_field(esdsl.Long())
+    monthly_logged_in_user_count: int = esdsl.mapped_field(esdsl.Long())
+    monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long())
+
+
+class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+    # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid')
+
+    # where noted, fields are meant to correspond to defined terms from COUNTER
+    # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
+    # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
+    item_osfid: str
+    item_type: list[str]           # counter:Data-Type
+    provider_id: list[str]         # counter:Database(?)
+    platform_iri: list[str]        # counter:Platform
+
+    # view counts include views on components or files contained by this item
+    view_count: int = esdsl.mapped_field(esdsl.Long())
+    view_session_count: int = esdsl.mapped_field(esdsl.Long())
+    cumulative_view_count: int = esdsl.mapped_field(esdsl.Long())
+    cumulative_view_session_count: int = esdsl.mapped_field(esdsl.Long())
+
+    # download counts of this item only (not including contained components or files)
+    download_count: int = esdsl.mapped_field(esdsl.Long())
+    download_session_count: int = esdsl.mapped_field(esdsl.Long())
+    cumulative_download_count: int = esdsl.mapped_field(esdsl.Long())
+    cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long())
+
+
+class PrivateSpamMetricsReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+    node_oopspam_flagged: int
+    node_oopspam_hammed: int
+    node_akismet_flagged: int
+    node_akismet_hammed: int
+    preprint_oopspam_flagged: int
+    preprint_oopspam_hammed: int
+    preprint_akismet_flagged: int
+    preprint_akismet_hammed: int

From 2e73161b508a73e192ae3675f60ec05569502848 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 1 Apr 2026 00:52:14 +0300
Subject: [PATCH 012/100] add new metrics

---
 osf/metrics/es8_metrics.py | 166 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 162 insertions(+), 4 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index ec20215449e..4c1b2de4a2d 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 import datetime
-
+import enum
 import elasticsearch8.dsl as esdsl
 import elasticsearch_metrics.imps.elastic8 as djelme
 
@@ -91,6 +91,100 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     pageview_info: PageviewInfo
 
 
+class CountedAuthUsage(djelme.CountedUsageRecord):
+    """CountedAuthUsage
+
+    Something was used! Let's quickly take note of that and
+    move on, then come back later to query/analyze/investigate.
+
+    Aim to support a COUNTER-style reporting api
+    (see https://cop5.projectcounter.org/en/5.0.2/)
+    """
+
+    # where noted, fields correspond to defined terms from COUNTER
+    # https://cop5.projectcounter.org/en/5.0.2/appendices/a-glossary-of-terms.html
+    platform_iri: str
+    provider_id: str
+    session_id: str
+    item_guid: str
+    item_type: str
+    surrounding_guids: list[str]
+    item_public: bool
+    user_is_authenticated: bool
+    action_labels: list[str]
+    class ActionLabel(enum.Enum):
+        SEARCH = 'search'       # counter:Search
+        VIEW = 'view'           # counter:Investigation
+        DOWNLOAD = 'download'   # counter:Request
+        WEB = 'web'             # counter:Regular (aka "pageview")
+        API = 'api'             # counter:TDM (aka "non-web api usage")
+        # TODO: count api usage, distinguish between web and non-web api requests
+
+    # pageviews get additional info to support the "node analytics" view
+    # (see `api.metrics.views.NodeAnalyticsQuery`)
+    pageview_info: PageviewInfo
+
+    class Meta:
+        dynamic = djelme.MetaField('strict')
+        source = djelme.MetaField(enabled=True)
+
+
+class BasePreprintMetrics(djelme.CountedUsageRecord):
+    '''
+        inherited fields:
+            platform_iri: str
+            database_iri: str
+            item_iri: str
+            sessionhour_id: str
+            within_iris: list[str]
+    '''
+    count: int
+    provider_id: str
+    user_id: str
+    preprint_id: str
+    version: str
+    path: str
+
+    class Index:
+        settings = {
+            'number_of_shards': 1,
+            'number_of_replicas': 1,
+            'refresh_interval': '1s',
+        }
+
+    class Meta:
+        abstract = True
+        source = djelme.MetaField(enabled=True)
+
+
+class PreprintView(BasePreprintMetrics):
+    pass
+
+
+class PreprintDownload(BasePreprintMetrics):
+    pass
+
+
+class RegistriesModerationMetrics(djelme.CountedUsageRecord):
+    registration_id: str
+    provider_id: str
+    trigger: str
+    from_state: str
+    to_state: str
+    user_id: str
+    comment: str
+
+    class Index:
+        settings = {
+            'number_of_shards': 1,
+            'number_of_replicas': 1,
+            'refresh_interval': '1s',
+        }
+
+    class Meta:
+        source = djelme.MetaField(enabled=True)
+
+
 ###
 # Reusable inner objects for reports
 
@@ -132,10 +226,74 @@ class RegistrationRunningTotals(esdsl.InnerDoc):
     withdrawn_daily: int
 
 
+class UsageByStorageAddon(esdsl.InnerDoc):
+    addon_shortname: str
+    enabled_usersettings: RunningTotal
+    linked_usersettings: RunningTotal
+    deleted_usersettings: RunningTotal
+    usersetting_links: RunningTotal
+    connected_nodesettings: RunningTotal
+    disconnected_nodesettings: RunningTotal
+    deleted_nodesettings: RunningTotal
+
+
 ###
 # Cyclic reports
 
 
+class StorageAddonUsage(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    usage_by_addon: UsageByStorageAddon
+
+
+class DownloadCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    daily_file_downloads: int
+
+
+class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',)
+
+    institution_id: str
+    institution_name: str
+    users: RunningTotal
+    nodes: NodeRunningTotals
+    projects: NodeRunningTotals
+    registered_nodes: RegistrationRunningTotals
+    registered_projects: RegistrationRunningTotals
+
+
+class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',)
+
+    domain_name: str
+    domain_name: int
+
+
+class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    nodes:  NodeRunningTotals
+    projects: NodeRunningTotals
+    registered_nodes: RegistrationRunningTotals
+    registered_projects: RegistrationRunningTotals
+
+
+class OsfstorageFileCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    files: FileRunningTotals
+
+
+class PreprintSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',)
+    provider_key: str
+    preprint_count: int
+
+
+class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+    active: int
+    deactivated: int
+    merged: int
+    new_users_daily: int
+    new_users_with_institution_daily: int
+    unconfirmed: int
+
+
 class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
     node_confirmed_spam: int
     node_confirmed_ham: int
@@ -151,7 +309,7 @@ class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
 
 
 class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
-    # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',)
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',)
     institution_id: str
     # user info:
     user_id: str
@@ -172,7 +330,7 @@ class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHL
 
 
 class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
-    UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', )
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', )
     institution_id: str
     user_count: int
     public_project_count: int
@@ -187,7 +345,7 @@ class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelm
 
 
 class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
-    # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid')
+    UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid')
 
     # where noted, fields are meant to correspond to defined terms from COUNTER
     # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html

From 4b4a4780cadb0361f69757a1db290b08b6d6178e Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 6 Apr 2026 14:29:56 +0300
Subject: [PATCH 013/100] fix flake8

---
 osf/metrics/es8_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 4c1b2de4a2d..e9ed147e858 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -269,7 +269,7 @@ class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
 
 
 class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
-    nodes:  NodeRunningTotals
+    nodes: NodeRunningTotals
     projects: NodeRunningTotals
     registered_nodes: RegistrationRunningTotals
     registered_projects: RegistrationRunningTotals

From d3b48e401e9713fda09ebc3633ae2c635a31daba Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 8 Apr 2026 17:46:42 +0300
Subject: [PATCH 014/100] add tests, use new version of djelme, consolidate
 into OsfCountedUsageRecord

---
 osf/metrics/es8_metrics.py            | 151 ++++++++++----------------
 osf_tests/metrics/test_es8_metrics.py |  42 +++++++
 poetry.lock                           |   6 +-
 pyproject.toml                        |   2 +-
 4 files changed, 102 insertions(+), 99 deletions(-)
 create mode 100644 osf_tests/metrics/test_es8_metrics.py

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index e9ed147e858..666f9bae359 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -1,8 +1,8 @@
-from __future__ import annotations
 import datetime
 import enum
 import elasticsearch8.dsl as esdsl
 import elasticsearch_metrics.imps.elastic8 as djelme
+from urllib.parse import urlsplit
 
 from osf.metrics.utils import YearMonth
 
@@ -53,6 +53,16 @@ class PageviewInfo(esdsl.InnerDoc):
 
     for CountedAuthUsage generated by viewing a web page
     """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.page_path: str = ''
+        if self.page_url: self.page_path = urlsplit(self.page_url).path.rstrip('/')
+        self.referer_domain: str = ''
+        if self.referer_url: self.referer_domain = urlsplit(self.referer_url).netloc
+        self.hour_of_day: int = 0
+        if self.timestamp: self.hour_of_day = self.timestamp.hour
+
     # fields that should be provided
     referer_url: str
     page_url: str
@@ -63,11 +73,9 @@ class PageviewInfo(esdsl.InnerDoc):
         },
     ))
 
-    # fields autofilled from the above (see `_autofill_fields`)
     page_path: str
     referer_domain: str
-    hour_of_day: str
-
+    hour_of_day: int
 
 ###
 # Event records
@@ -90,82 +98,15 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     action_labels: list[str]
     pageview_info: PageviewInfo
 
+class ActionLabel(enum.Enum):
+    SEARCH = 'search'  # counter:Search
+    VIEW = 'view'  # counter:Investigation
+    DOWNLOAD = 'download'  # counter:Request
+    WEB = 'web'  # counter:Regular (aka "pageview")
+    API = 'api'  # counter:TDM (aka "non-web api usage")
 
-class CountedAuthUsage(djelme.CountedUsageRecord):
-    """CountedAuthUsage
-
-    Something was used! Let's quickly take note of that and
-    move on, then come back later to query/analyze/investigate.
-
-    Aim to support a COUNTER-style reporting api
-    (see https://cop5.projectcounter.org/en/5.0.2/)
-    """
 
-    # where noted, fields correspond to defined terms from COUNTER
-    # https://cop5.projectcounter.org/en/5.0.2/appendices/a-glossary-of-terms.html
-    platform_iri: str
-    provider_id: str
-    session_id: str
-    item_guid: str
-    item_type: str
-    surrounding_guids: list[str]
-    item_public: bool
-    user_is_authenticated: bool
-    action_labels: list[str]
-    class ActionLabel(enum.Enum):
-        SEARCH = 'search'       # counter:Search
-        VIEW = 'view'           # counter:Investigation
-        DOWNLOAD = 'download'   # counter:Request
-        WEB = 'web'             # counter:Regular (aka "pageview")
-        API = 'api'             # counter:TDM (aka "non-web api usage")
-        # TODO: count api usage, distinguish between web and non-web api requests
-
-    # pageviews get additional info to support the "node analytics" view
-    # (see `api.metrics.views.NodeAnalyticsQuery`)
-    pageview_info: PageviewInfo
-
-    class Meta:
-        dynamic = djelme.MetaField('strict')
-        source = djelme.MetaField(enabled=True)
-
-
-class BasePreprintMetrics(djelme.CountedUsageRecord):
-    '''
-        inherited fields:
-            platform_iri: str
-            database_iri: str
-            item_iri: str
-            sessionhour_id: str
-            within_iris: list[str]
-    '''
-    count: int
-    provider_id: str
-    user_id: str
-    preprint_id: str
-    version: str
-    path: str
-
-    class Index:
-        settings = {
-            'number_of_shards': 1,
-            'number_of_replicas': 1,
-            'refresh_interval': '1s',
-        }
-
-    class Meta:
-        abstract = True
-        source = djelme.MetaField(enabled=True)
-
-
-class PreprintView(BasePreprintMetrics):
-    pass
-
-
-class PreprintDownload(BasePreprintMetrics):
-    pass
-
-
-class RegistriesModerationMetrics(djelme.CountedUsageRecord):
+class Es8RegistriesModerationMetrics(djelme.EventRecord):
     registration_id: str
     provider_id: str
     trigger: str
@@ -181,9 +122,6 @@ class Index:
             'refresh_interval': '1s',
         }
 
-    class Meta:
-        source = djelme.MetaField(enabled=True)
-
 
 ###
 # Reusable inner objects for reports
@@ -241,15 +179,20 @@ class UsageByStorageAddon(esdsl.InnerDoc):
 # Cyclic reports
 
 
-class StorageAddonUsage(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
-    usage_by_addon: UsageByStorageAddon
+class Es8StorageAddonUsage(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
+
+    usage_by_addon: list[UsageByStorageAddon]
+
 
+class Es8DownloadCountReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
 
-class DownloadCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
     daily_file_downloads: int
 
 
-class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8InstitutionSummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',)
 
     institution_id: str
@@ -261,31 +204,40 @@ class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY
     registered_projects: RegistrationRunningTotals
 
 
-class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8NewUserDomainReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',)
 
     domain_name: str
     domain_name: int
 
 
-class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8NodeSummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
+
     nodes: NodeRunningTotals
     projects: NodeRunningTotals
     registered_nodes: RegistrationRunningTotals
     registered_projects: RegistrationRunningTotals
 
 
-class OsfstorageFileCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8OsfstorageFileCountReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
+
     files: FileRunningTotals
 
 
-class PreprintSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8PreprintSummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
+
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',)
     provider_key: str
     preprint_count: int
 
 
-class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
+class Es8UserSummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 3
+
     active: int
     deactivated: int
     merged: int
@@ -294,7 +246,9 @@ class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY):
     unconfirmed: int
 
 
-class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+class Es8SpamSummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 2
+
     node_confirmed_spam: int
     node_confirmed_ham: int
     node_flagged: int
@@ -308,8 +262,10 @@ class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
     user_marked_as_ham: int
 
 
-class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+class Es8InstitutionalUserReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 2
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',)
+
     institution_id: str
     # user info:
     user_id: str
@@ -329,8 +285,10 @@ class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHL
     storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 2
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', )
+
     institution_id: str
     user_count: int
     public_project_count: int
@@ -344,7 +302,8 @@ class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelm
     monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+class Es8PublicItemUsageReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 2
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid')
 
     # where noted, fields are meant to correspond to defined terms from COUNTER
@@ -368,7 +327,9 @@ class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY)
     cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class PrivateSpamMetricsReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY):
+class Es8PrivateSpamMetricsReport(djelme.CyclicRecord):
+    CYCLE_TIMEDEPTH = 2
+
     node_oopspam_flagged: int
     node_oopspam_hammed: int
     node_akismet_flagged: int
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
new file mode 100644
index 00000000000..2afca72174b
--- /dev/null
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -0,0 +1,42 @@
+from datetime import datetime
+
+from osf.metrics.es8_metrics import (
+    Es8DownloadCountReport,
+    Es8UserSummaryReport,
+    OsfCountedUsageRecord,
+    PageviewInfo
+)
+
+
+class TestEs8Metrics:
+    def test_import_all_reports(self):
+        assert True
+
+    def test_instantiate_of_reports(self):
+        download_report = Es8DownloadCountReport()
+        assert hasattr(download_report, 'daily_file_downloads')
+        assert download_report.daily_file_downloads is None
+
+        user_report = Es8UserSummaryReport()
+        assert hasattr(user_report, 'active')
+        assert user_report.active is None
+
+    def test_nested_pageview(self):
+        usage = OsfCountedUsageRecord(
+            pageview_info={
+                "page_url": "https://example.com",
+                "referer_url": "https://google.com",
+            }
+        )
+        assert usage.pageview_info is not None
+
+    def test_pageview_info_autofill(self):
+        obj = PageviewInfo(
+            page_url="https://example.com/path/test",
+            referer_url="https://google.com",
+            timestamp=datetime(2024, 1, 1, 15, 0),
+        )
+
+        assert obj.page_path == "/path/test"
+        assert obj.referer_domain == "google.com"
+        assert obj.hour_of_day == 15
diff --git a/poetry.lock b/poetry.lock
index f0dca07d95c..5bbe2ae1f49 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"
-resolved_reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"
+reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
+resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "c3108a036ae092e35f7696ffe230e271b774bb12e546db77bb8b12b5fb6eca7d"
+content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922"
diff --git a/pyproject.toml b/pyproject.toml
index b40cdc704ab..375b8cacd25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From e4bec9dfa9f5e760bc11df3ce991af32a56e300a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Thu, 9 Apr 2026 10:45:01 +0300
Subject: [PATCH 015/100] add imports to init, flake8

---
 osf/metrics/__init__.py               | 18 ++++++++++++++++++
 osf/metrics/es8_metrics.py            |  9 ++++++---
 osf_tests/metrics/test_es8_metrics.py | 12 ++++++------
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py
index 0e7b1a1cf32..b2c8af54999 100644
--- a/osf/metrics/__init__.py
+++ b/osf/metrics/__init__.py
@@ -18,6 +18,17 @@
     UserSummaryReport,
 )
 
+from .es8_metrics import (
+    Es8DownloadCountReport,
+    Es8UserSummaryReport,
+    Es8NodeSummaryReport,
+    Es8SpamSummaryReport,
+    Es8InstitutionSummaryReport,
+    Es8NewUserDomainReport,
+    Es8OsfstorageFileCountReport,
+    Es8StorageAddonUsage,
+)
+
 DAILY_REPORTS = (
     DownloadCountReport,
     InstitutionSummaryReport,
@@ -27,6 +38,13 @@
     PreprintSummaryReport,
     StorageAddonUsage,
     UserSummaryReport,
+    Es8DownloadCountReport,
+    Es8InstitutionSummaryReport,
+    Es8NewUserDomainReport,
+    Es8NodeSummaryReport,
+    Es8OsfstorageFileCountReport,
+    Es8StorageAddonUsage,
+    Es8UserSummaryReport
 )
 
 
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 666f9bae359..020a9c72c80 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -57,11 +57,14 @@ class PageviewInfo(esdsl.InnerDoc):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.page_path: str = ''
-        if self.page_url: self.page_path = urlsplit(self.page_url).path.rstrip('/')
+        if self.page_url:
+            self.page_path = urlsplit(self.page_url).path.rstrip('/')
         self.referer_domain: str = ''
-        if self.referer_url: self.referer_domain = urlsplit(self.referer_url).netloc
+        if self.referer_url:
+            self.referer_domain = urlsplit(self.referer_url).netloc
         self.hour_of_day: int = 0
-        if self.timestamp: self.hour_of_day = self.timestamp.hour
+        if self.timestamp:
+            self.hour_of_day = self.timestamp.hour
 
     # fields that should be provided
     referer_url: str
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 2afca72174b..1158836b688 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -24,19 +24,19 @@ def test_instantiate_of_reports(self):
     def test_nested_pageview(self):
         usage = OsfCountedUsageRecord(
             pageview_info={
-                "page_url": "https://example.com",
-                "referer_url": "https://google.com",
+                'page_url': 'https://example.com',
+                'referer_url': 'https://google.com',
             }
         )
         assert usage.pageview_info is not None
 
     def test_pageview_info_autofill(self):
         obj = PageviewInfo(
-            page_url="https://example.com/path/test",
-            referer_url="https://google.com",
+            page_url='https://example.com/path/test',
+            referer_url='https://google.com',
             timestamp=datetime(2024, 1, 1, 15, 0),
         )
 
-        assert obj.page_path == "/path/test"
-        assert obj.referer_domain == "google.com"
+        assert obj.page_path == '/path/tes'
+        assert obj.referer_domain == 'google.com'
         assert obj.hour_of_day == 15

From ee515ef615b2363724aba1d445ef7e4e15f89c4a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Thu, 9 Apr 2026 16:39:44 +0300
Subject: [PATCH 016/100] fix test, imports, flake8

---
 .docker-compose.env                   | 2 +-
 osf/metrics/__init__.py               | 1 -
 osf_tests/metrics/test_es8_metrics.py | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.docker-compose.env b/.docker-compose.env
index 449c9747adf..444788ecb46 100644
--- a/.docker-compose.env
+++ b/.docker-compose.env
@@ -6,7 +6,7 @@ DOMAIN=http://localhost:5000/
 INTERNAL_DOMAIN=http://192.168.168.167:5000/
 API_DOMAIN=http://localhost:8000/
 ELASTIC_URI=192.168.168.167:9200
-ELASTIC6_URI=192.168.168.167:9201
+ELASTIC6_URI=http://192.168.168.167:9201
 ELASTIC8_URI=http://192.168.168.167:9202
 OSF_DB_HOST=192.168.168.167
 DB_HOST=192.168.168.167
diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py
index b2c8af54999..6cef14f5cf9 100644
--- a/osf/metrics/__init__.py
+++ b/osf/metrics/__init__.py
@@ -22,7 +22,6 @@
     Es8DownloadCountReport,
     Es8UserSummaryReport,
     Es8NodeSummaryReport,
-    Es8SpamSummaryReport,
     Es8InstitutionSummaryReport,
     Es8NewUserDomainReport,
     Es8OsfstorageFileCountReport,
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 1158836b688..3d48a3d35c4 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -37,6 +37,6 @@ def test_pageview_info_autofill(self):
             timestamp=datetime(2024, 1, 1, 15, 0),
         )
 
-        assert obj.page_path == '/path/tes'
+        assert obj.page_path == '/path/test'
         assert obj.referer_domain == 'google.com'
         assert obj.hour_of_day == 15

From ca60b58e0dc08d9f81ca085df45f43792d3ed252 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 10 Apr 2026 17:23:02 +0300
Subject: [PATCH 017/100] add security, flake8, fixes, add to test-build.yml

---
 .docker-compose.env              |  5 ++++-
 .github/workflows/test-build.yml | 18 ++++++++++++++++++
 api/base/settings/defaults.py    |  8 +++++++-
 docker-compose.yml               | 13 ++++++++++++-
 website/settings/defaults.py     |  5 ++++-
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/.docker-compose.env b/.docker-compose.env
index 444788ecb46..2542d16e841 100644
--- a/.docker-compose.env
+++ b/.docker-compose.env
@@ -6,8 +6,11 @@ DOMAIN=http://localhost:5000/
 INTERNAL_DOMAIN=http://192.168.168.167:5000/
 API_DOMAIN=http://localhost:8000/
 ELASTIC_URI=192.168.168.167:9200
-ELASTIC6_URI=http://192.168.168.167:9201
+ELASTIC6_URI=192.168.168.167:9201
 ELASTIC8_URI=http://192.168.168.167:9202
+ELASTIC8_CERT_PATH=/elastic8_certs/ca/ca.crt
+ELASTIC8_USERNAME=elastic
+ELASTIC8_SECRET=secretsecret
 OSF_DB_HOST=192.168.168.167
 DB_HOST=192.168.168.167
 REDIS_HOST=redis://192.168.168.167:6379
diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 33942968529..0e8541acf2a 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -64,6 +64,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -84,6 +92,8 @@ jobs:
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
+      env:
+          ELASTIC8_URL: http://localhost:9202
 
   api1_and_js:
     runs-on: ubuntu-22.04
@@ -208,6 +218,14 @@ jobs:
       checks: write
     needs: build-cache
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
 
diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index 816586ffcfb..42e8d9bd495 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -316,7 +316,7 @@
 HASHIDS_SALT = 'pinkhimalayan'
 
 # django-elasticsearch-metrics
-DJELME_AUTOSETUP = True
+# DJELME_AUTOSETUP = True
 DJELME_BACKENDS = {
     'osfmetrics_es6': {
         'elasticsearch_metrics.imps.elastic6': {
@@ -327,6 +327,12 @@
     'osfmetrics_es8': {
         'elasticsearch_metrics.imps.elastic8': {
             'hosts': osf_settings.ELASTIC8_URI,
+            'ca_certs': osf_settings.ELASTIC8_CERT_PATH,
+            'basic_auth': (
+                (osf_settings.ELASTIC8_USERNAME, osf_settings.ELASTIC8_SECRET)
+                if osf_settings.ELASTIC8_SECRET is not None
+                else None
+            ),
         },
     },
 }
diff --git a/docker-compose.yml b/docker-compose.yml
index f26c3617b67..09aedd58247 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -82,8 +82,19 @@ services:
     image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11
     platform: linux/arm64
     environment:
-      - xpack.security.enabled=false
+      - ELASTIC_PASSWORD=secretsecret
+      - node.name=singlenode
       - discovery.type=single-node
+      - xpack.security.enabled=true
+      - xpack.security.http.ssl.enabled=true
+      - xpack.security.http.ssl.key=/elastic8_certs/singlenode/singlenode.key
+      - xpack.security.http.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt
+      - xpack.security.http.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt
+      - xpack.security.transport.ssl.enabled=true
+      - xpack.security.transport.ssl.key=/elastic8_certs/singlenode/singlenode.key
+      - xpack.security.transport.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt
+      - xpack.security.transport.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt
+      - xpack.security.transport.ssl.verification_mode=certificate
     ports:
       - 9202:9200
     volumes:
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index d0ae58dc863..1e8032cc95c 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -113,7 +113,10 @@ def parent_dir(path):
 SEARCH_ENGINE = 'elastic'  # Can be 'elastic', or None
 ELASTIC_URI = '127.0.0.1:9200'
 ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201')
-ELASTIC8_URI = os.environ.get('ELASTIC8_URI', '127.0.0.1:9202')
+ELASTIC8_URI = os.environ.get('ELASTIC8_URI')
+ELASTIC8_CERT_PATH = os.environ.get('ELASTIC8_CERT_PATH')
+ELASTIC8_USERNAME = os.environ.get('ELASTIC8_USERNAME', 'elastic')
+ELASTIC8_SECRET = os.environ.get('ELASTIC8_SECRET')
 ELASTIC_TIMEOUT = 10
 ELASTIC_INDEX = 'website'
 ELASTIC_KWARGS = {

From 080daf69dbcd839ed7d712c7f78053b13097b1e1 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Sat, 11 Apr 2026 00:01:31 +0300
Subject: [PATCH 018/100] test-build update

---
 .github/workflows/test-build.yml | 41 +++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 0e8541acf2a..6aa39e39800 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -12,6 +12,7 @@ env:
   OSF_DB_PORT: 5432
   OSF_DB_PASSWORD: postgres
   GITHUB_ACTIONS: true
+  ELASTIC8_URL: http://localhost:9202
 
 jobs:
   build-cache:
@@ -37,6 +38,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -57,6 +66,8 @@ jobs:
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
+      env:
+        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
 
   website:
     runs-on: ubuntu-22.04
@@ -93,7 +104,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-          ELASTIC8_URL: http://localhost:9202
+        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
 
   api1_and_js:
     runs-on: ubuntu-22.04
@@ -101,6 +112,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -123,6 +142,8 @@ jobs:
       - name: Upload report
         if: (success() || failure())    # run this step even if previous step failed
         uses: ./.github/actions/gen-report
+        env:
+          ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
 
   api2:
     runs-on: ubuntu-22.04
@@ -130,6 +151,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -150,6 +179,8 @@ jobs:
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
+      env:
+        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
 
   api3_and_osf:
     runs-on: ubuntu-22.04
@@ -218,14 +249,6 @@ jobs:
       checks: write
     needs: build-cache
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
 

From fde32a4ee09debee75af5523088fd8c3c921f713 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Sat, 11 Apr 2026 01:19:07 +0300
Subject: [PATCH 019/100] test-build fix url

---
 .github/workflows/test-build.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 6aa39e39800..60d056de001 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -67,7 +67,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
+        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
 
   website:
     runs-on: ubuntu-22.04
@@ -104,7 +104,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
+        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
 
   api1_and_js:
     runs-on: ubuntu-22.04
@@ -143,7 +143,7 @@ jobs:
         if: (success() || failure())    # run this step even if previous step failed
         uses: ./.github/actions/gen-report
         env:
-          ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
+          ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
 
   api2:
     runs-on: ubuntu-22.04
@@ -180,7 +180,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }}
+        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
 
   api3_and_osf:
     runs-on: ubuntu-22.04

From e6da70bbf73cbbf348fddf148d91b03472913e9e Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Sat, 11 Apr 2026 01:39:47 +0300
Subject: [PATCH 020/100] test-build fix naming

---
 .github/workflows/test-build.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 60d056de001..fdfd4c190b3 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -12,7 +12,7 @@ env:
   OSF_DB_PORT: 5432
   OSF_DB_PASSWORD: postgres
   GITHUB_ACTIONS: true
-  ELASTIC8_URL: http://localhost:9202
+  ELASTIC8_URI: http://localhost:9202
 
 jobs:
   build-cache:
@@ -67,7 +67,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
+        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   website:
     runs-on: ubuntu-22.04
@@ -104,7 +104,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
+        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api1_and_js:
     runs-on: ubuntu-22.04
@@ -143,7 +143,7 @@ jobs:
         if: (success() || failure())    # run this step even if previous step failed
         uses: ./.github/actions/gen-report
         env:
-          ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
+          ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api2:
     runs-on: ubuntu-22.04
@@ -180,7 +180,7 @@ jobs:
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
       env:
-        ELASTIC8_URL: ${{ env.ELASTIC8_URL }}
+        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api3_and_osf:
     runs-on: ubuntu-22.04

From 2b8a81c10b13e687c29144acf55b699d73ac3a2d Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Sat, 11 Apr 2026 08:55:02 +0300
Subject: [PATCH 021/100] update test

---
 osf_tests/metrics/test_es8_metrics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 3d48a3d35c4..28dedd01eb4 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -15,11 +15,9 @@ def test_import_all_reports(self):
     def test_instantiate_of_reports(self):
         download_report = Es8DownloadCountReport()
         assert hasattr(download_report, 'daily_file_downloads')
-        assert download_report.daily_file_downloads is None
 
         user_report = Es8UserSummaryReport()
         assert hasattr(user_report, 'active')
-        assert user_report.active is None
 
     def test_nested_pageview(self):
         usage = OsfCountedUsageRecord(

From 6167778672af9f5d87ede22d31ce71b0863d09fc Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 13 Apr 2026 12:24:40 +0300
Subject: [PATCH 022/100] add wait

---
 .github/workflows/test-build.yml | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index fdfd4c190b3..844ca93fb15 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -61,13 +61,23 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
+    - name: Wait for Elasticsearch
+      run: |
+        echo "Waiting for Elasticsearch..."
+        for i in {1..30}; do
+          if curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; then
+            echo "Elasticsearch is ready"
+            exit 0
+          fi
+          sleep 2
+        done
+        echo "Elasticsearch failed"
+        exit 1
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-addons --junit
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
-      env:
-        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   website:
     runs-on: ubuntu-22.04
@@ -103,8 +113,6 @@ jobs:
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
-      env:
-        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api1_and_js:
     runs-on: ubuntu-22.04
@@ -142,8 +150,6 @@ jobs:
       - name: Upload report
         if: (success() || failure())    # run this step even if previous step failed
         uses: ./.github/actions/gen-report
-        env:
-          ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api2:
     runs-on: ubuntu-22.04
@@ -179,8 +185,6 @@ jobs:
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
-      env:
-        ELASTIC8_URI: ${{ env.ELASTIC8_URI }}
 
   api3_and_osf:
     runs-on: ubuntu-22.04

From eb0a5d9efe8cc6c683550acf99be5047b766e07e Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 13 Apr 2026 17:57:22 +0300
Subject: [PATCH 023/100] remove wait

---
 .github/workflows/test-build.yml | 12 ------------
 poetry.lock                      |  6 +++---
 pyproject.toml                   |  2 +-
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 844ca93fb15..d6bf817b7c6 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -61,18 +61,6 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
-    - name: Wait for Elasticsearch
-      run: |
-        echo "Waiting for Elasticsearch..."
-        for i in {1..30}; do
-          if curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; then
-            echo "Elasticsearch is ready"
-            exit 0
-          fi
-          sleep 2
-        done
-        echo "Elasticsearch failed"
-        exit 1
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-addons --junit
     - name: Upload report
diff --git a/poetry.lock b/poetry.lock
index 5bbe2ae1f49..d524525f564 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
-resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
+reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"
+resolved_reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922"
+content-hash = "fd91980689d1fa7c440e0c81a0b0e9543445821350cb154f18c63f236c0898be"
diff --git a/pyproject.toml b/pyproject.toml
index 375b8cacd25..fb008eb2c41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 78ed96fdc13cfb70d509d9e55f2028e9f789adf5 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 14 Apr 2026 12:50:11 +0300
Subject: [PATCH 024/100] cleanup

---
 .github/workflows/test-build.yml | 37 +++++++++++---------------------
 api/base/settings/defaults.py    |  1 -
 2 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index d6bf817b7c6..8d1d3ebb318 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -12,7 +12,6 @@ env:
   OSF_DB_PORT: 5432
   OSF_DB_PASSWORD: postgres
   GITHUB_ACTIONS: true
-  ELASTIC8_URI: http://localhost:9202
 
 jobs:
   build-cache:
@@ -61,8 +60,20 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
+    - name: Wait for Elasticsearch
+      run: |
+        echo "Waiting for ES8 health..."
+        sleep 5
+        until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
+          echo "ES8 not ready yet..."
+          sleep 5
+        done
+        
+        echo "ES8 started successfully!"
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-addons --junit
+      env:
+        ELASTIC8_URI: http://localhost:9202
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
@@ -73,14 +84,6 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -108,14 +111,6 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -145,14 +140,6 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index 42e8d9bd495..72e169c25a1 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -316,7 +316,6 @@
 HASHIDS_SALT = 'pinkhimalayan'
 
 # django-elasticsearch-metrics
-# DJELME_AUTOSETUP = True
 DJELME_BACKENDS = {
     'osfmetrics_es6': {
         'elasticsearch_metrics.imps.elastic6': {

From 70cf5e2442fb8d8c271a1f8ab7d1b8b63191c0d0 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 14 Apr 2026 13:13:36 +0300
Subject: [PATCH 025/100] add wait, downgrade djelme, flake8

---
 .github/workflows/test-build.yml | 20 +++++++++++++++++++-
 poetry.lock                      |  6 +++---
 pyproject.toml                   |  2 +-
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 8d1d3ebb318..09fbbb5b319 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -68,7 +68,6 @@ jobs:
           echo "ES8 not ready yet..."
           sleep 5
         done
-        
         echo "ES8 started successfully!"
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-addons --junit
@@ -111,6 +110,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -128,8 +135,19 @@ jobs:
       - uses: ./.github/actions/start-build
       - name: NVM & yarn install
         run: poetry run python3 -m invoke assets --dev
+      - name: Wait for Elasticsearch
+        run: |
+          echo "Waiting for ES8 health..."
+          sleep 5
+          until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
+            echo "ES8 not ready yet..."
+            sleep 5
+          done
+          echo "ES8 started successfully!"
       - name: Run test
         run: poetry run python3 -m invoke test-ci-api1-and-js --junit
+        env:
+          ELASTIC8_URI: http://localhost:9202
       - name: Upload report
         if: (success() || failure())    # run this step even if previous step failed
         uses: ./.github/actions/gen-report
diff --git a/poetry.lock b/poetry.lock
index d524525f564..5bbe2ae1f49 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"
-resolved_reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"
+reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
+resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "fd91980689d1fa7c440e0c81a0b0e9543445821350cb154f18c63f236c0898be"
+content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922"
diff --git a/pyproject.toml b/pyproject.toml
index fb008eb2c41..375b8cacd25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 3e35fee6522c4ad7e23da83a8915fde74455bebf Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 14 Apr 2026 16:09:27 +0300
Subject: [PATCH 026/100] add elastic8

---
 .github/workflows/test-build.yml      | 19 +++++++++++++++++++
 osf_tests/metrics/test_es8_metrics.py |  6 ++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 09fbbb5b319..0f2e101c408 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -158,6 +158,14 @@ jobs:
     permissions:
       checks: write
     services:
+      elasticsearch8:
+        image: elasticsearch:8.19.11
+        ports:
+          - 9202:9200
+        env:
+          xpack.security.enabled: false
+          node.name: singlenode
+          cluster.initial_master_nodes: singlenode
       postgres:
         image: postgres
         env:
@@ -173,8 +181,19 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
+    - name: Wait for Elasticsearch
+      run: |
+        echo "Waiting for ES8 health..."
+        sleep 5
+        until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
+          echo "ES8 not ready yet..."
+          sleep 5
+        done
+        echo "ES8 started successfully!"
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-api2 --junit
+      env:
+        ELASTIC8_URI: http://localhost:9202
     - name: Upload report
       if: (success() || failure())    # run this step even if previous step failed
       uses: ./.github/actions/gen-report
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 28dedd01eb4..07705825f86 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -13,14 +13,15 @@ def test_import_all_reports(self):
         assert True
 
     def test_instantiate_of_reports(self):
-        download_report = Es8DownloadCountReport()
+        download_report = Es8DownloadCountReport(cycle_coverage='2026-01-01')
         assert hasattr(download_report, 'daily_file_downloads')
 
-        user_report = Es8UserSummaryReport()
+        user_report = Es8UserSummaryReport(cycle_coverage='2026-01-01')
         assert hasattr(user_report, 'active')
 
     def test_nested_pageview(self):
         usage = OsfCountedUsageRecord(
+            cycle_coverage='2026-01-01',
             pageview_info={
                 'page_url': 'https://example.com',
                 'referer_url': 'https://google.com',
@@ -30,6 +31,7 @@ def test_nested_pageview(self):
 
     def test_pageview_info_autofill(self):
         obj = PageviewInfo(
+            cycle_coverage='2026-01-01',
             page_url='https://example.com/path/test',
             referer_url='https://google.com',
             timestamp=datetime(2024, 1, 1, 15, 0),

From a2363420c43c40a55b69630a096fb549cc49a71a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Tue, 14 Apr 2026 16:46:59 +0300
Subject: [PATCH 027/100] fix test

---
 osf_tests/metrics/test_es8_metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 07705825f86..d6b3d4c4434 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -13,15 +13,15 @@ def test_import_all_reports(self):
         assert True
 
     def test_instantiate_of_reports(self):
-        download_report = Es8DownloadCountReport(cycle_coverage='2026-01-01')
+        download_report = Es8DownloadCountReport(cycle_coverage='2026.01.01')
         assert hasattr(download_report, 'daily_file_downloads')
 
-        user_report = Es8UserSummaryReport(cycle_coverage='2026-01-01')
+        user_report = Es8UserSummaryReport(cycle_coverage='2026.01.01')
         assert hasattr(user_report, 'active')
 
     def test_nested_pageview(self):
         usage = OsfCountedUsageRecord(
-            cycle_coverage='2026-01-01',
+            cycle_coverage='2026.01.01',
             pageview_info={
                 'page_url': 'https://example.com',
                 'referer_url': 'https://google.com',
@@ -31,7 +31,7 @@ def test_nested_pageview(self):
 
     def test_pageview_info_autofill(self):
         obj = PageviewInfo(
-            cycle_coverage='2026-01-01',
+            cycle_coverage='2026.01.01',
             page_url='https://example.com/path/test',
             referer_url='https://google.com',
             timestamp=datetime(2024, 1, 1, 15, 0),

From 00b055b5a13db955fdf1eab1a558f2bc5b64f33f Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 12:36:35 -0400
Subject: [PATCH 028/100] timedepth constants

---
 osf/metrics/es8_metrics.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 020a9c72c80..85c9141fba8 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -1,8 +1,10 @@
 import datetime
 import enum
+from urllib.parse import urlsplit
+
 import elasticsearch8.dsl as esdsl
+from elasticsearch_metrics import DAILY, MONTHLY
 import elasticsearch_metrics.imps.elastic8 as djelme
-from urllib.parse import urlsplit
 
 from osf.metrics.utils import YearMonth
 
@@ -183,19 +185,19 @@ class UsageByStorageAddon(esdsl.InnerDoc):
 
 
 class Es8StorageAddonUsage(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     usage_by_addon: list[UsageByStorageAddon]
 
 
 class Es8DownloadCountReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     daily_file_downloads: int
 
 
 class Es8InstitutionSummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',)
 
     institution_id: str
@@ -208,7 +210,7 @@ class Es8InstitutionSummaryReport(djelme.CyclicRecord):
 
 
 class Es8NewUserDomainReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',)
 
     domain_name: str
@@ -216,7 +218,7 @@ class Es8NewUserDomainReport(djelme.CyclicRecord):
 
 
 class Es8NodeSummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     nodes: NodeRunningTotals
     projects: NodeRunningTotals
@@ -225,13 +227,13 @@ class Es8NodeSummaryReport(djelme.CyclicRecord):
 
 
 class Es8OsfstorageFileCountReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     files: FileRunningTotals
 
 
 class Es8PreprintSummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',)
     provider_key: str
@@ -239,7 +241,7 @@ class Es8PreprintSummaryReport(djelme.CyclicRecord):
 
 
 class Es8UserSummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 3
+    CYCLE_TIMEDEPTH = DAILY
 
     active: int
     deactivated: int
@@ -250,7 +252,7 @@ class Es8UserSummaryReport(djelme.CyclicRecord):
 
 
 class Es8SpamSummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 2
+    CYCLE_TIMEDEPTH = MONTHLY
 
     node_confirmed_spam: int
     node_confirmed_ham: int
@@ -266,7 +268,7 @@ class Es8SpamSummaryReport(djelme.CyclicRecord):
 
 
 class Es8InstitutionalUserReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 2
+    CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',)
 
     institution_id: str
@@ -289,7 +291,7 @@ class Es8InstitutionalUserReport(djelme.CyclicRecord):
 
 
 class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 2
+    CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', )
 
     institution_id: str
@@ -306,7 +308,7 @@ class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord):
 
 
 class Es8PublicItemUsageReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 2
+    CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid')
 
     # where noted, fields are meant to correspond to defined terms from COUNTER
@@ -331,7 +333,7 @@ class Es8PublicItemUsageReport(djelme.CyclicRecord):
 
 
 class Es8PrivateSpamMetricsReport(djelme.CyclicRecord):
-    CYCLE_TIMEDEPTH = 2
+    CYCLE_TIMEDEPTH = MONTHLY
 
     node_oopspam_flagged: int
     node_oopspam_hammed: int

From dddc94e791d7de76f487d1a00ac767848279ce87 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 12:36:14 -0400
Subject: [PATCH 029/100] tidy gh actions with yaml anchors, health checks

---
 .github/workflows/test-build.yml | 131 ++++---------------------------
 1 file changed, 17 insertions(+), 114 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 0f2e101c408..3433e689a42 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -37,15 +37,19 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
+      elasticsearch8: &ES8_SERVICE
+        image: elasticsearch:8.19.14
         ports:
           - 9202:9200
         env:
+          discovery.type: single-node
           xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
-      postgres:
+        options: >-
+          --health-cmd "curl -sf http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"
+          --health-interval 10s
+          --health-timeout 30s
+          --health-retries 5
+      postgres: &POSTGRES_SERVICE
         image: postgres
         env:
           POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
@@ -60,15 +64,6 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
-    - name: Wait for Elasticsearch
-      run: |
-        echo "Waiting for ES8 health..."
-        sleep 5
-        until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
-          echo "ES8 not ready yet..."
-          sleep 5
-        done
-        echo "ES8 started successfully!"
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-addons --junit
       env:
@@ -83,18 +78,7 @@ jobs:
     permissions:
       checks: write
     services:
-      postgres:
-        image: postgres
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      postgres: *POSTGRES_SERVICE
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
@@ -110,40 +94,13 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
-      postgres:
-        image: postgres
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      elasticsearch8: *ES8_SERVICE
+      postgres: *POSTGRES_SERVICE
     steps:
       - uses: actions/checkout@v2
       - uses: ./.github/actions/start-build
       - name: NVM & yarn install
         run: poetry run python3 -m invoke assets --dev
-      - name: Wait for Elasticsearch
-        run: |
-          echo "Waiting for ES8 health..."
-          sleep 5
-          until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
-            echo "ES8 not ready yet..."
-            sleep 5
-          done
-          echo "ES8 started successfully!"
       - name: Run test
         run: poetry run python3 -m invoke test-ci-api1-and-js --junit
         env:
@@ -158,26 +115,8 @@ jobs:
     permissions:
       checks: write
     services:
-      elasticsearch8:
-        image: elasticsearch:8.19.11
-        ports:
-          - 9202:9200
-        env:
-          xpack.security.enabled: false
-          node.name: singlenode
-          cluster.initial_master_nodes: singlenode
-      postgres:
-        image: postgres
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      elasticsearch8: *ES8_SERVICE
+      postgres: *POSTGRES_SERVICE
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
@@ -204,19 +143,7 @@ jobs:
       checks: write
     needs: build-cache
     services:
-      postgres:
-        image: postgres
-
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      postgres: *POSTGRES_SERVICE
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
@@ -232,19 +159,7 @@ jobs:
       checks: write
     needs: build-cache
     services:
-      postgres:
-        image: postgres
-
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      postgres: *POSTGRES_SERVICE
       mailhog:
         image: mailhog/mailhog
         ports:
@@ -265,19 +180,7 @@ jobs:
       checks: write
     needs: build-cache
     services:
-      postgres:
-        image: postgres
-
-        env:
-          POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }}
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          # Maps tcp port 5432 on service container to the host
-          - 5432:5432
+      postgres: *POSTGRES_SERVICE
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build

From 46a934f901b1c685aa33b9a34ff204b370abcdf5 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 13:59:10 -0400
Subject: [PATCH 030/100] simplify local elasticsearch8 config

---
 docker-compose.yml | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 09aedd58247..83e8fd27483 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -79,26 +79,22 @@ services:
     stdin_open: true
 
   elasticsearch8:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11
-    platform: linux/arm64
+    image: elasticsearch:8.19.14
     environment:
-      - ELASTIC_PASSWORD=secretsecret
-      - node.name=singlenode
       - discovery.type=single-node
-      - xpack.security.enabled=true
-      - xpack.security.http.ssl.enabled=true
-      - xpack.security.http.ssl.key=/elastic8_certs/singlenode/singlenode.key
-      - xpack.security.http.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt
-      - xpack.security.http.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt
-      - xpack.security.transport.ssl.enabled=true
-      - xpack.security.transport.ssl.key=/elastic8_certs/singlenode/singlenode.key
-      - xpack.security.transport.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt
-      - xpack.security.transport.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt
-      - xpack.security.transport.ssl.verification_mode=certificate
+      - xpack.security.enabled=false
+      - ES_JAVA_OPTS=-Xms512m -Xmx512m  # reduce memory usage
+      - xpack.ml.enabled=false
     ports:
       - 9202:9200
     volumes:
       - elasticsearch8_data_vol:/usr/share/elasticsearch/data
+    healthcheck:
+      start_period: 15s
+      test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"]
+      interval: 10s
+      timeout: 30s
+      retries: 5
     stdin_open: true
 
   postgres:

From 49f925945a2ea913dd56755fb9ac1d9efb905eb4 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 15:57:53 -0400
Subject: [PATCH 031/100] bump djelme to get fixes

---
 poetry.lock    | 6 +++---
 pyproject.toml | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 5bbe2ae1f49..90665bce81f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
-resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5"
+reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"
+resolved_reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922"
+content-hash = "ef1d6d327f5557e43482793b276ccb6c5fd07989f27367af3a3736a8547b4d1a"
diff --git a/pyproject.toml b/pyproject.toml
index 375b8cacd25..013df3f448d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 29839b975f440d1bbbe962d7e1ee0fce813e16c5 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:03:45 -0400
Subject: [PATCH 032/100] tests passing with djelme es8

- use elasticsearch_metrics.test.util
- move "Es8" prefix to suffix
- autofill fields on `save`, not `__init__`
  (to work with how esdsl loads search results)
---
 conftest.py                           | 22 +++-----
 osf/metrics/__init__.py               | 18 +------
 osf/metrics/es8_metrics.py            | 57 +++++++++++----------
 osf/metrics/reports.py                |  2 +
 osf_tests/metrics/test_es8_metrics.py | 74 +++++++++++++++------------
 5 files changed, 82 insertions(+), 91 deletions(-)

diff --git a/conftest.py b/conftest.py
index 232b788c0fb..6eafa1b7a55 100644
--- a/conftest.py
+++ b/conftest.py
@@ -6,7 +6,7 @@
 from django.db import transaction
 from elasticsearch6_dsl.connections import connections
 from website import settings as osf_settings
-from elasticsearch_metrics.tests._test_util import RealElasticTestCase
+from elasticsearch_metrics.tests.util import djelme_test_backends
 from faker import Factory
 import pytest
 import responses
@@ -146,19 +146,9 @@ def _es_metrics_marker(request):
         yield
         return
 
-    connections.create_connection(
-        alias='osfmetrics_es6',
-        hosts=osf_settings.ELASTIC6_URI,
-    )
-
-    class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True):
-        ...
-    es6_test_case = _Es6TestCase()
-    es6_test_case.setUp()
-    try:
+    with djelme_test_backends():
         yield
-    finally:
-        es6_test_case.tearDown()
+
 
 @pytest.fixture
 def mock_share_responses():
@@ -356,6 +346,6 @@ def mock_gravy_valet_get_verified_links():
         yield mock_get_verified_links
 
 
-@pytest.fixture(autouse=True)
-def load_notification_types(db, *args, **kwargs):
-    populate_notification_types(*args, **kwargs)
+# @pytest.fixture(autouse=True)
+# def load_notification_types(db, *args, **kwargs):
+#     populate_notification_types(*args, **kwargs)
diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py
index 6cef14f5cf9..6056e6d92f3 100644
--- a/osf/metrics/__init__.py
+++ b/osf/metrics/__init__.py
@@ -17,16 +17,8 @@
     StorageAddonUsage,
     UserSummaryReport,
 )
+from . import es8_metrics
 
-from .es8_metrics import (
-    Es8DownloadCountReport,
-    Es8UserSummaryReport,
-    Es8NodeSummaryReport,
-    Es8InstitutionSummaryReport,
-    Es8NewUserDomainReport,
-    Es8OsfstorageFileCountReport,
-    Es8StorageAddonUsage,
-)
 
 DAILY_REPORTS = (
     DownloadCountReport,
@@ -37,13 +29,6 @@
     PreprintSummaryReport,
     StorageAddonUsage,
     UserSummaryReport,
-    Es8DownloadCountReport,
-    Es8InstitutionSummaryReport,
-    Es8NewUserDomainReport,
-    Es8NodeSummaryReport,
-    Es8OsfstorageFileCountReport,
-    Es8StorageAddonUsage,
-    Es8UserSummaryReport
 )
 
 
@@ -53,4 +38,5 @@
     'PreprintView',
     'PreprintDownload',
     'RegistriesModerationMetrics',
+    'es8_metrics',
 )
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 85c9141fba8..436a1c62d46 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -56,18 +56,6 @@ class PageviewInfo(esdsl.InnerDoc):
     for CountedAuthUsage generated by viewing a web page
     """
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.page_path: str = ''
-        if self.page_url:
-            self.page_path = urlsplit(self.page_url).path.rstrip('/')
-        self.referer_domain: str = ''
-        if self.referer_url:
-            self.referer_domain = urlsplit(self.referer_url).netloc
-        self.hour_of_day: int = 0
-        if self.timestamp:
-            self.hour_of_day = self.timestamp.hour
-
     # fields that should be provided
     referer_url: str
     page_url: str
@@ -78,10 +66,12 @@ def __init__(self, *args, **kwargs):
         },
     ))
 
+    # fields auto-filled
     page_path: str
     referer_domain: str
     hour_of_day: int
 
+
 ###
 # Event records
 
@@ -103,6 +93,19 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     action_labels: list[str]
     pageview_info: PageviewInfo
 
+    def save(self, *args, **kwargs):
+        # autofill pageview_info fields
+        if self.pageview_info:
+            self.pageview_info.hour_of_day = self.timestamp.hour
+            _url = self.pageview_info.page_url
+            if _url:
+                self.pageview_info.page_path = urlsplit(_url).path.rstrip('/')
+            _ref_url = self.pageview_info.referer_url
+            if _ref_url:
+                self.pageview_info.referer_domain = urlsplit(_ref_url).netloc
+        super().save(*args, **kwargs)
+
+
 class ActionLabel(enum.Enum):
     SEARCH = 'search'  # counter:Search
     VIEW = 'view'  # counter:Investigation
@@ -111,7 +114,7 @@ class ActionLabel(enum.Enum):
     API = 'api'  # counter:TDM (aka "non-web api usage")
 
 
-class Es8RegistriesModerationMetrics(djelme.EventRecord):
+class RegistriesModerationMetricsEs8(djelme.EventRecord):
     registration_id: str
     provider_id: str
     trigger: str
@@ -184,19 +187,19 @@ class UsageByStorageAddon(esdsl.InnerDoc):
 # Cyclic reports
 
 
-class Es8StorageAddonUsage(djelme.CyclicRecord):
+class StorageAddonUsageEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     usage_by_addon: list[UsageByStorageAddon]
 
 
-class Es8DownloadCountReport(djelme.CyclicRecord):
+class DownloadCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     daily_file_downloads: int
 
 
-class Es8InstitutionSummaryReport(djelme.CyclicRecord):
+class InstitutionSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',)
 
@@ -209,15 +212,15 @@ class Es8InstitutionSummaryReport(djelme.CyclicRecord):
     registered_projects: RegistrationRunningTotals
 
 
-class Es8NewUserDomainReport(djelme.CyclicRecord):
+class NewUserDomainReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',)
 
     domain_name: str
-    domain_name: int
+    new_user_count: int
 
 
-class Es8NodeSummaryReport(djelme.CyclicRecord):
+class NodeSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     nodes: NodeRunningTotals
@@ -226,13 +229,13 @@ class Es8NodeSummaryReport(djelme.CyclicRecord):
     registered_projects: RegistrationRunningTotals
 
 
-class Es8OsfstorageFileCountReport(djelme.CyclicRecord):
+class OsfstorageFileCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     files: FileRunningTotals
 
 
-class Es8PreprintSummaryReport(djelme.CyclicRecord):
+class PreprintSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',)
@@ -240,7 +243,7 @@ class Es8PreprintSummaryReport(djelme.CyclicRecord):
     preprint_count: int
 
 
-class Es8UserSummaryReport(djelme.CyclicRecord):
+class UserSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     active: int
@@ -251,7 +254,7 @@ class Es8UserSummaryReport(djelme.CyclicRecord):
     unconfirmed: int
 
 
-class Es8SpamSummaryReport(djelme.CyclicRecord):
+class SpamSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
 
     node_confirmed_spam: int
@@ -267,7 +270,7 @@ class Es8SpamSummaryReport(djelme.CyclicRecord):
     user_marked_as_ham: int
 
 
-class Es8InstitutionalUserReport(djelme.CyclicRecord):
+class InstitutionalUserReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',)
 
@@ -290,7 +293,7 @@ class Es8InstitutionalUserReport(djelme.CyclicRecord):
     storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord):
+class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', )
 
@@ -307,7 +310,7 @@ class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord):
     monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class Es8PublicItemUsageReport(djelme.CyclicRecord):
+class PublicItemUsageReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid')
 
@@ -332,7 +335,7 @@ class Es8PublicItemUsageReport(djelme.CyclicRecord):
     cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long())
 
 
-class Es8PrivateSpamMetricsReport(djelme.CyclicRecord):
+class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
 
     node_oopspam_flagged: int
diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py
index 9d71ea7e8c2..62479e359cd 100644
--- a/osf/metrics/reports.py
+++ b/osf/metrics/reports.py
@@ -120,6 +120,8 @@ def save(self, *args, **kwargs):
 
 @receiver(metrics_pre_save)
 def set_report_id(sender, instance, **kwargs):
+    if not issubclass(sender, metrics.Metric):
+        return  # skip es8 record types
     try:
         _unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS
     except AttributeError:
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index d6b3d4c4434..68d767fca89 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -1,42 +1,52 @@
 from datetime import datetime
 
+from elasticsearch_metrics.tests.util import djelme_test_backends
+import pytest
+
 from osf.metrics.es8_metrics import (
-    Es8DownloadCountReport,
-    Es8UserSummaryReport,
+    PageviewInfo,
+    DownloadCountReportEs8,
     OsfCountedUsageRecord,
-    PageviewInfo
 )
 
 
 class TestEs8Metrics:
-    def test_import_all_reports(self):
-        assert True
-
-    def test_instantiate_of_reports(self):
-        download_report = Es8DownloadCountReport(cycle_coverage='2026.01.01')
-        assert hasattr(download_report, 'daily_file_downloads')
-
-        user_report = Es8UserSummaryReport(cycle_coverage='2026.01.01')
-        assert hasattr(user_report, 'active')
-
-    def test_nested_pageview(self):
-        usage = OsfCountedUsageRecord(
-            cycle_coverage='2026.01.01',
-            pageview_info={
-                'page_url': 'https://example.com',
-                'referer_url': 'https://google.com',
-            }
-        )
-        assert usage.pageview_info is not None
-
-    def test_pageview_info_autofill(self):
-        obj = PageviewInfo(
-            cycle_coverage='2026.01.01',
-            page_url='https://example.com/path/test',
-            referer_url='https://google.com',
+    """smoke tests to check that djelme records can be saved and searched"""
+    @pytest.fixture(autouse=True)
+    def _real_elastic(self):
+        with djelme_test_backends():
+            yield
+
+    def test_nested_pageview_autofill(self):
+        usage = OsfCountedUsageRecord.record(
             timestamp=datetime(2024, 1, 1, 15, 0),
+            sessionhour_id='blah',
+            database_iri='https://osf.example/provider',
+            item_iri='https://osf.example/itemm',
+            item_osfid='itemm',
+            item_public=True,
+            item_type='https://osf.example/Preprint',
+            platform_iri='https://osf.example',
+            user_is_authenticated=False,
+            pageview_info=PageviewInfo(
+                page_url="https://example.com/path/test",
+                referer_url="https://google.com",
+                route_name='foo.bar',
+                page_title='title title',
+            ),
         )
-
-        assert obj.page_path == '/path/test'
-        assert obj.referer_domain == 'google.com'
-        assert obj.hour_of_day == 15
+        assert usage.pageview_info.page_path == "/path/test"
+        assert usage.pageview_info.referer_domain == "google.com"
+        assert usage.pageview_info.hour_of_day == 15
+
+    def test_save_report(self):
+        _saved = DownloadCountReportEs8.record(
+            cycle_coverage="2026.1.1",
+            daily_file_downloads=17,
+        )
+        DownloadCountReportEs8.refresh_timeseries_indexes()
+        _response = DownloadCountReportEs8.search().execute()
+        (_fetched,) = _response
+        assert _fetched.meta.id == _saved.meta.id
+        assert _fetched.cycle_coverage == '2026.1.1'
+        assert _fetched.daily_file_downloads == 17

From 619cac7cca77df36e2d04f37dd55a060d36e4f75 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:12:45 -0400
Subject: [PATCH 033/100] fix(test): patch check_index_template

---
 osf_tests/metrics/test_daily_report.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py
index 9301cdb114f..5228e2342c5 100644
--- a/osf_tests/metrics/test_daily_report.py
+++ b/osf_tests/metrics/test_daily_report.py
@@ -10,8 +10,9 @@
 class TestDailyReportKey:
     @pytest.fixture
     def mock_save(self):
-        with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
-            yield mock_save
+        with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'):
+            with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
+                yield mock_save
 
     def test_default(self, mock_save):
         # only one of this type of report per day

From 8cec095a5b604a9f97abd4297af96774c7e585ac Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:20:45 -0400
Subject: [PATCH 034/100] uncomment autouse fixture

---
 conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conftest.py b/conftest.py
index 6eafa1b7a55..7adf6bdeba6 100644
--- a/conftest.py
+++ b/conftest.py
@@ -346,6 +346,6 @@ def mock_gravy_valet_get_verified_links():
         yield mock_get_verified_links
 
 
-# @pytest.fixture(autouse=True)
-# def load_notification_types(db, *args, **kwargs):
-#     populate_notification_types(*args, **kwargs)
+@pytest.fixture(autouse=True)
+def load_notification_types(db, *args, **kwargs):
+    populate_notification_types(*args, **kwargs)

From c24430fff7b2fdca860be0bb216bad20108a67ab Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:25:56 -0400
Subject: [PATCH 035/100] remove unnecessary loop

---
 .github/workflows/test-build.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index 3433e689a42..011b621cca9 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -120,15 +120,6 @@ jobs:
     steps:
     - uses: actions/checkout@v2
     - uses: ./.github/actions/start-build
-    - name: Wait for Elasticsearch
-      run: |
-        echo "Waiting for ES8 health..."
-        sleep 5
-        until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do
-          echo "ES8 not ready yet..."
-          sleep 5
-        done
-        echo "ES8 started successfully!"
     - name: Run tests
       run: poetry run python3 -m invoke test-ci-api2 --junit
       env:

From cd3282786f5fc2c715f8bd0387903e6ba2d44d9a Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:28:40 -0400
Subject: [PATCH 036/100] plac8 flake8

---
 conftest.py                           |  1 -
 osf_tests/metrics/test_es8_metrics.py | 11 ++++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/conftest.py b/conftest.py
index 7adf6bdeba6..9d8861a1e97 100644
--- a/conftest.py
+++ b/conftest.py
@@ -5,7 +5,6 @@
 
 from django.db import transaction
 from elasticsearch6_dsl.connections import connections
-from website import settings as osf_settings
 from elasticsearch_metrics.tests.util import djelme_test_backends
 from faker import Factory
 import pytest
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index 68d767fca89..e93579628dc 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -12,6 +12,7 @@
 
 class TestEs8Metrics:
     """smoke tests to check that djelme records can be saved and searched"""
+
     @pytest.fixture(autouse=True)
     def _real_elastic(self):
         with djelme_test_backends():
@@ -29,19 +30,19 @@ def test_nested_pageview_autofill(self):
             platform_iri='https://osf.example',
             user_is_authenticated=False,
             pageview_info=PageviewInfo(
-                page_url="https://example.com/path/test",
-                referer_url="https://google.com",
+                page_url='https://example.com/path/test',
+                referer_url='https://google.com',
                 route_name='foo.bar',
                 page_title='title title',
             ),
         )
-        assert usage.pageview_info.page_path == "/path/test"
-        assert usage.pageview_info.referer_domain == "google.com"
+        assert usage.pageview_info.page_path == '/path/test'
+        assert usage.pageview_info.referer_domain == 'google.com'
         assert usage.pageview_info.hour_of_day == 15
 
     def test_save_report(self):
         _saved = DownloadCountReportEs8.record(
-            cycle_coverage="2026.1.1",
+            cycle_coverage='2026.1.1',
             daily_file_downloads=17,
         )
         DownloadCountReportEs8.refresh_timeseries_indexes()

From db938be047d4df29e2d01ee18b923f9c681eaa35 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 17:47:24 -0400
Subject: [PATCH 037/100] remove unused local env vars

---
 .docker-compose.env | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.docker-compose.env b/.docker-compose.env
index 2542d16e841..80eebc8707b 100644
--- a/.docker-compose.env
+++ b/.docker-compose.env
@@ -8,9 +8,7 @@ API_DOMAIN=http://localhost:8000/
 ELASTIC_URI=192.168.168.167:9200
 ELASTIC6_URI=192.168.168.167:9201
 ELASTIC8_URI=http://192.168.168.167:9202
-ELASTIC8_CERT_PATH=/elastic8_certs/ca/ca.crt
 ELASTIC8_USERNAME=elastic
-ELASTIC8_SECRET=secretsecret
 OSF_DB_HOST=192.168.168.167
 DB_HOST=192.168.168.167
 REDIS_HOST=redis://192.168.168.167:6379

From 52a2bc94935057d874e9fc3cdf28b6f5d0e9e684 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 18:16:12 -0400
Subject: [PATCH 038/100] better use waffle switch ELASTICSEARCH_METRICS

---
 .../test_registries_moderation_metrics.py      | 12 ------------
 conftest.py                                    | 18 ++++++++++++------
 osf/models/registrations.py                    |  5 ++++-
 osf_tests/metrics/test_monthly_report.py       |  5 +++--
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py
index 0f3dddb79b6..f5d3a047b10 100644
--- a/api_tests/metrics/test_registries_moderation_metrics.py
+++ b/api_tests/metrics/test_registries_moderation_metrics.py
@@ -1,7 +1,5 @@
 import pytest
-from waffle.testutils import override_switch
 
-from osf import features
 from osf_tests.factories import RegistrationFactory, AuthUserFactory
 from osf.utils.workflows import RegistrationModerationStates, RegistrationModerationTriggers
 from osf.metrics import RegistriesModerationMetrics
@@ -17,11 +15,6 @@ class TestRegistrationModerationMetrics:
     def registration(self):
         return RegistrationFactory()
 
-    @pytest.fixture(autouse=True)
-    def enable_elasticsearch_metrics(self):
-        with override_switch(features.ELASTICSEARCH_METRICS, active=True):
-            yield
-
     @pytest.mark.es_metrics
     def test_record_transitions(self, registration):
         with capture_notifications():
@@ -50,11 +43,6 @@ class TestRegistrationModerationMetricsView:
     def registration(self):
         return RegistrationFactory()
 
-    @pytest.fixture(autouse=True)
-    def enable_elasticsearch_metrics(self):
-        with override_switch(features.ELASTICSEARCH_METRICS, active=True):
-            yield
-
     @pytest.fixture
     def user(self):
         user = AuthUserFactory()
diff --git a/conftest.py b/conftest.py
index 9d8861a1e97..0c944957661 100644
--- a/conftest.py
+++ b/conftest.py
@@ -10,12 +10,15 @@
 import pytest
 import responses
 import xml.etree.ElementTree as ET
+from waffle.testutils import override_switch
 
 from api_tests.share import _utils as shtrove_test_utils
 from framework.celery_tasks import app as celery_app
 from osf.external.spam import tasks as spam_tasks
 from website import settings as website_settings
 from osf.management.commands.populate_notification_types import populate_notification_types
+from osf import features
+
 
 def pytest_configure(config):
     if not os.getenv('GITHUB_ACTIONS') == 'true':
@@ -141,12 +144,15 @@ def _es_metrics_marker(request):
     """
     marker = request.node.get_closest_marker('es_metrics')
 
-    if not marker:
-        yield
-        return
-
-    with djelme_test_backends():
-        yield
+    if marker:
+        with (
+            override_switch(features.ELASTICSEARCH_METRICS, active=True),
+            djelme_test_backends(),
+        ):
+            yield
+    else:
+        with override_switch(features.ELASTICSEARCH_METRICS, active=False):
+            yield
 
 
 @pytest.fixture
diff --git a/osf/models/registrations.py b/osf/models/registrations.py
index e1d819b43bf..e9114355649 100644
--- a/osf/models/registrations.py
+++ b/osf/models/registrations.py
@@ -14,9 +14,11 @@
     UserObjectPermissionBase,
 )
 from dirtyfields import DirtyFieldsMixin
+import waffle
 
 from framework.auth import Auth
 from framework.exceptions import PermissionsError
+from osf import features
 from osf.models import Identifier
 from osf.utils.fields import NonNaiveDateTimeField, LowercaseCharField
 from osf.utils.permissions import ADMIN, READ, WRITE
@@ -782,7 +784,8 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment
             comment=comment
         )
         action.save()
-        RegistriesModerationMetrics.record_transitions(action)
+        if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
+            RegistriesModerationMetrics.record_transitions(action)
 
         moderation_notifications = {
             RegistrationModerationTriggers.SUBMIT: notify.notify_submit,
diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
index cc8c4137cb2..9d0980cd5b8 100644
--- a/osf_tests/metrics/test_monthly_report.py
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -11,8 +11,9 @@
 class TestMonthlyReportKey:
     @pytest.fixture
     def mock_save(self):
-        with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
-            yield mock_save
+        with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'):
+            with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
+                yield mock_save
 
     def test_default(self, mock_save):
         # only one of this type of report per month

From 82de65b8ed8c2eb20e30fcb09eb139e40e7cbcd9 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 18:18:46 -0400
Subject: [PATCH 039/100] mock check mock save

---
 api_tests/metrics/test_counted_usage.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py
index 568d663be9e..e2cb7040037 100644
--- a/api_tests/metrics/test_counted_usage.py
+++ b/api_tests/metrics/test_counted_usage.py
@@ -38,8 +38,9 @@ def assert_saved_with(mock_save, *, expected_doc_id=None, expected_attrs):
 
 @pytest.fixture
 def mock_save():
-    with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
-        yield mock_save
+    with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'):
+        with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save:
+            yield mock_save
 
 
 @pytest.mark.django_db

From b33280df27eee0082eebcf9f037b9eea62e0df07 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 15 Apr 2026 15:57:03 +0300
Subject: [PATCH 040/100] remove the override

---
 conftest.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/conftest.py b/conftest.py
index 0c944957661..198316f1cc4 100644
--- a/conftest.py
+++ b/conftest.py
@@ -144,15 +144,15 @@ def _es_metrics_marker(request):
     """
     marker = request.node.get_closest_marker('es_metrics')
 
-    if marker:
-        with (
-            override_switch(features.ELASTICSEARCH_METRICS, active=True),
-            djelme_test_backends(),
-        ):
-            yield
-    else:
-        with override_switch(features.ELASTICSEARCH_METRICS, active=False):
-            yield
+    if not marker:
+        yield
+        return
+
+    with (
+        override_switch(features.ELASTICSEARCH_METRICS, active=True),
+        djelme_test_backends(),
+    ):
+        yield
 
 
 @pytest.fixture

From 1cef7d335c8a00677f6e37ddb975bd14619e02d6 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 15 Apr 2026 16:55:34 +0300
Subject: [PATCH 041/100] fix failing test

---
 osf_tests/metrics/test_monthly_report.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py
index 9d0980cd5b8..ba981e997d6 100644
--- a/osf_tests/metrics/test_monthly_report.py
+++ b/osf_tests/metrics/test_monthly_report.py
@@ -80,6 +80,7 @@ class Meta:
 
 
 @pytest.mark.es_metrics
+@pytest.mark.django_db
 class TestLastMonthReport:
     @pytest.fixture
     def osfid(self):

From 029647f4c70581bbd4a47fc1dbf266a020fccb00 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 9 Apr 2026 09:40:37 -0400
Subject: [PATCH 042/100] add background_migration queue (in the osf way)

---
 framework/celery_tasks/routers.py |  2 ++
 website/settings/defaults.py      | 11 +++++++++++
 2 files changed, 13 insertions(+)

diff --git a/framework/celery_tasks/routers.py b/framework/celery_tasks/routers.py
index c33238780e8..d9d6e335286 100644
--- a/framework/celery_tasks/routers.py
+++ b/framework/celery_tasks/routers.py
@@ -11,6 +11,8 @@ def match_by_module(task_path):
             return CeleryConfig.task_med_queue
         if task_subpath in CeleryConfig.high_pri_modules:
             return CeleryConfig.task_high_queue
+        if task_subpath in CeleryConfig.background_migration_modules:
+            return CeleryConfig.task_background_migration_queue
         if task_subpath in CeleryConfig.remote_computing_modules:
             return CeleryConfig.task_remote_computing_queue
         if task_subpath in CeleryConfig.account_status_changes_modules:
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 1e8032cc95c..3053f9d1075 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -421,6 +421,7 @@ class CeleryConfig:
     task_account_status_changes_queue = 'account_status_changes'
     task_external_high_queue = 'external_high'
     task_external_low_queue = 'external_low'
+    task_background_migration_queue = 'background_migration'
 
     external_high_modules = {
         'osf.tasks.log_gv_addon',
@@ -487,6 +488,10 @@ class CeleryConfig:
         'api.share.utils',
     }
 
+    background_migration_modules = {
+        'osf.management.commands.metrics_es8_migration',
+    }
+
     try:
         from kombu import Queue, Exchange
     except ImportError:
@@ -540,6 +545,12 @@ class CeleryConfig:
                 routing_key=task_external_low_queue,
                 consumer_arguments={'x-priority': -2},
             ),
+            Queue(
+                task_background_migration_queue,
+                Exchange(task_background_migration_queue),
+                routing_key=task_background_migration_queue,
+                consumer_arguments={'x-priority': -1},
+            ),
         )
 
         task_default_exchange_type = 'direct'

From ac397e8c509df085257ba214fa621fc5b61e8c13 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 14 Apr 2026 11:30:17 -0400
Subject: [PATCH 043/100] wip

---
 .../commands/metrics_es8_migration.py         | 184 ++++++++++++++++++
 osf/management/commands/sync_databases.py     |   2 +-
 2 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 osf/management/commands/metrics_es8_migration.py

diff --git a/osf/management/commands/metrics_es8_migration.py b/osf/management/commands/metrics_es8_migration.py
new file mode 100644
index 00000000000..46b187c63bf
--- /dev/null
+++ b/osf/management/commands/metrics_es8_migration.py
@@ -0,0 +1,184 @@
+import datetime
+import logging
+
+
+from django.core.management import call_command
+from django.core.management.base import BaseCommand
+from elasticsearch6 import helpers as es6_helpers
+from elasticsearch8 import helpers as es8_helpers
+from elasticsearch_metrics.registry import djelme_registry
+from elasticsearch_metrics.imps import elastic8 as djel8me
+from elasticsearch_metrics.util.timeparts import format_timeparts
+
+from framework.celery_tasks import app as celery_app
+from osf.metrics import reports as es6_reports
+from osf.metrics import es8_metrics, RegistriesModerationMetrics
+
+
+_logger = logging.getLogger(__name__)
+
+_UNCHANGED_RECORDTYPES = {
+    # reports
+    es6_reports.StorageAddonUsage: es8_metrics.StorageAddonUsageEs8,
+    es6_reports.DownloadCountReport: es8_metrics.DownloadCountReportEs8,
+    es6_reports.InstitutionSummaryReport: es8_metrics.InstitutionSummaryReportEs8,
+    es6_reports.NewUserDomainReport: es8_metrics.NewUserDomainReportEs8,
+    es6_reports.NodeSummaryReport: es8_metrics.NodeSummaryReportEs8,
+    es6_reports.OsfstorageFileCountReport: es8_metrics.OsfstorageFileCountReportEs8,
+    es6_reports.PreprintSummaryReport: es8_metrics.PreprintSummaryReportEs8,
+    es6_reports.UserSummaryReport: es8_metrics.UserSummaryReportEs8,
+    es6_reports.SpamSummaryReport: es8_metrics.SpamSummaryReportEs8,
+    es6_reports.InstitutionalUserReport: es8_metrics.InstitutionalUserReportEs8,
+    es6_reports.InstitutionMonthlySummaryReport: es8_metrics.InstitutionMonthlySummaryReportEs8,
+    es6_reports.PrivateSpamMetricsReport: es8_metrics.PrivateSpamMetricsReportEs8,
+    # events
+    RegistriesModerationMetrics: es8_metrics.RegistriesModerationMetricsEs8,
+}
+
+
+def _debug_migrate(es8_client, each_new):
+    for _each in each_new:
+        print(_each)
+
+
+def _do_migrate(es8_client, each_new):
+    es8_helpers.bulk(es8_client, each_new, ..., stats_only=True)
+
+
+def _es6_scan(es6_recordtype, from_when: str, until_when: str):
+    return es6_helpers.scan(
+        es6_client,
+        index=es6_recordtype._template_pattern,
+        query={"range": {"timestamp": {"gte": from_when, "lt": until_when}}},
+    )
+
+
+def _cycle_coverage_daily(report_date): ...
+
+
+def _cycle_coverage_monthly(report_yearmonth): ...
+
+
+def _unchanged_report_kwargs(es6_recordtype, hit):
+    if issubclass(es6_recordtype, es6_reports.DailyReport):
+        _cycle_coverage = format_timeparts(
+            datetime.date.fromisoformat(hit.pop("report_date")), djel8me.DAILY
+        )
+    elif issubclass(es6_recordtype, es6_reports.MonthlyReport):
+        _cycle_coverage = format_timeparts(hit.pop("report_yearmonth"), djel8me.MONTHLY)
+    return {
+        **hit,
+        'cycle_coverage': _cycle_coverage,
+    }
+
+
+@celery_app.task
+def migrate_unchanged_recordtype(
+    es6_recordtype_name: str,
+):
+    _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
+    _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
+
+    def _each_new():
+        for _hit in _es6_scan(_es6_recordtype, from_when, until_when):
+            breakpoint()
+            yield _es8_recordtype.record(
+                ...,
+                using=False,  # saved in bulk
+            )
+
+    _debug_migrate(_each_new())
+    # _do_migrate(_each_new())
+
+
+@celery_app.task
+def migrate_preprint_views(from_date, until_date):
+    # convert to counted-usage
+    ...
+
+
+@celery_app.task
+def migrate_preprint_downloads(from_date, until_date):
+    # convert to counted-usage
+    ...
+
+
+@celery_app.task
+def migrate_usage_reports(from_date, until_date):
+    # from PublicItemUsageReport to PublicItemUsageReportEs8
+    # add cumulative count
+    ...
+
+
+class Command(BaseCommand):
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--start",
+            action="store_true",
+        )
+        parser.add_argument(
+            "--unchanged",
+            action="store_true",
+        )
+        parser.add_argument(
+            "--usage-events",
+            action="store_true",
+        )
+        parser.add_argument(
+            "--usage-reports",
+            action="store_true",
+        )
+
+    def handle(self, *, start, unchanged, usage_events, usage_reports, **kwargs):
+        call_command('djelme_backend_setup')  # ensure all index templates
+        _default_all = not any((unchanged, usage_events, usage_reports))
+
+        if unchanged or _default_all:
+            self._handle_unchanged(start=start)
+        if usage_events or _default_all:
+            self._handle_usage_events(start=start)
+        if usage_reports or _default_all:
+            self._handle_usage_reports(start=start)
+
+    def _handle_unchanged(self, *, start: bool):
+        # for each (unchanged) report/event:
+        for _es6_cls, _es8_cls in _UNCHANGED_RECORDTYPES.items():
+            _es6_count = _es6_cls.search().count()
+            _es8_count = _es8_cls.search().count()
+            _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
+            self.stdout.write(f'{_es6_cls.__name__} (es6):\t{_es6_count}')
+            self.stdout.write(f'{_es8_cls.__name__}:\t{_style(_es8_count)}')
+            if start:
+                self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
+                # TODO: migrate_unchanged_recordtype.apply_async(...)
+            self.stdout.write('---')
+
+    def _handle_usage_events(self, *, start: bool):
+        # for counted-usage events:
+        # TODO: last X months only
+        # get/compare/print cardinalities
+        # schedule (per-day?) tasks (if --start)
+        _es6_pview_count = PreprintView.search().count()
+        _es6_pdownload_count = PreprintDownload.search().count()
+        _es6_usage_event_count = CountedAuthUsage.search().count()
+        _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
+        _es8_count = OsfCountedUsageEvent.search().count()
+        _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
+        self.stdout.write(f'{PreprintView.__name__} (es6):\t{_es6_pview_count}')
+        self.stdout.write(f'{PreprintDownload.__name__} (es6):\t{_es6_pdownload_count}')
+        self.stdout.write(f'{CountedAuthUsage.__name__} (es6):\t{_es6_pdownload_count}')
+        self.stdout.write(f'total (es6):\t{_es6_count}')
+        self.stdout.write(f'{OsfCountedUsageEvent.__name__}:\t{_style(_es8_count)}')
+        if start:
+            self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
+            # TODO: migrate_usage_events.apply_async(...)
+
+    def _handle_usage_reports(self, *, start: bool):
+        _es6_count = PublicItemUsageReport.search().count()
+        _es8_count = PublicItemUsageReportEs8.search().count()
+        _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
+        self.stdout.write(f'{PublicItemUsageReport.__name__} (es6):\t{_es6_count}')
+        self.stdout.write(f'{PublicItemUsageReportEs8.__name__}:\t{_style(_es8_count)}')
+        _item_count
+        # (if --start) schedule task per item (by composite agg on es6 public usage reports)
+        # each item-task iter thru reports oldest to newest, adding cumulative counts
diff --git a/osf/management/commands/sync_databases.py b/osf/management/commands/sync_databases.py
index c31d63ea16e..b5030b4bba7 100644
--- a/osf/management/commands/sync_databases.py
+++ b/osf/management/commands/sync_databases.py
@@ -20,7 +20,7 @@ def handle(self, *args, **options):
             ['migrate'],
         ]
         if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
-            COMMANDS.append(['sync_metrics'])
+            COMMANDS.append(['djelme_backend_setup'])
 
         for check in COMMANDS:
             call_command(*check)

From ef981e7c886b67806d60c1d37261dde4cd6e1e8d Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 15 Apr 2026 08:25:11 -0400
Subject: [PATCH 044/100] wip

---
 .../commands/metrics_es8_migration.py         | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/osf/management/commands/metrics_es8_migration.py b/osf/management/commands/metrics_es8_migration.py
index 46b187c63bf..c2f765eef72 100644
--- a/osf/management/commands/metrics_es8_migration.py
+++ b/osf/management/commands/metrics_es8_migration.py
@@ -11,6 +11,11 @@
 from elasticsearch_metrics.util.timeparts import format_timeparts
 
 from framework.celery_tasks import app as celery_app
+from osf.metrics.preprint_metrics import (
+    PreprintView as PreprintViewEs6,
+    PreprintDownload as PreprintDownloadEs6,
+)
+from osf.metrics.counted_usage import CountedAuthUsage as CountedUsageEs6
 from osf.metrics import reports as es6_reports
 from osf.metrics import es8_metrics, RegistriesModerationMetrics
 
@@ -132,7 +137,6 @@ def add_arguments(self, parser):
     def handle(self, *, start, unchanged, usage_events, usage_reports, **kwargs):
         call_command('djelme_backend_setup')  # ensure all index templates
         _default_all = not any((unchanged, usage_events, usage_reports))
-
         if unchanged or _default_all:
             self._handle_unchanged(start=start)
         if usage_events or _default_all:
@@ -143,12 +147,13 @@ def handle(self, *, start, unchanged, usage_events, usage_reports, **kwargs):
     def _handle_unchanged(self, *, start: bool):
         # for each (unchanged) report/event:
         for _es6_cls, _es8_cls in _UNCHANGED_RECORDTYPES.items():
+            # display counts
             _es6_count = _es6_cls.search().count()
             _es8_count = _es8_cls.search().count()
             _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
             self.stdout.write(f'{_es6_cls.__name__} (es6):\t{_es6_count}')
             self.stdout.write(f'{_es8_cls.__name__}:\t{_style(_es8_count)}')
-            if start:
+            if start:  # schedule task
                 self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
                 # TODO: migrate_unchanged_recordtype.apply_async(...)
             self.stdout.write('---')
@@ -156,29 +161,30 @@ def _handle_unchanged(self, *, start: bool):
     def _handle_usage_events(self, *, start: bool):
         # for counted-usage events:
         # TODO: last X months only
-        # get/compare/print cardinalities
-        # schedule (per-day?) tasks (if --start)
-        _es6_pview_count = PreprintView.search().count()
-        _es6_pdownload_count = PreprintDownload.search().count()
-        _es6_usage_event_count = CountedAuthUsage.search().count()
+        # display counts for each view/download event type
+        _es6_pview_count = PreprintViewEs6.search().count()
+        _es6_pdownload_count = PreprintDownloadEs6.search().count()
+        _es6_usage_event_count = CountedUsageEs6.search().count()
         _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
-        _es8_count = OsfCountedUsageEvent.search().count()
+        _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
         _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
-        self.stdout.write(f'{PreprintView.__name__} (es6):\t{_es6_pview_count}')
-        self.stdout.write(f'{PreprintDownload.__name__} (es6):\t{_es6_pdownload_count}')
-        self.stdout.write(f'{CountedAuthUsage.__name__} (es6):\t{_es6_pdownload_count}')
+        self.stdout.write(f'{PreprintViewEs6.__name__} (es6):\t{_es6_pview_count}')
+        self.stdout.write(f'{PreprintDownloadEs6.__name__} (es6):\t{_es6_pdownload_count}')
+        self.stdout.write(f'{CountedUsageEs6.__name__} (es6):\t{_es6_pdownload_count}')
         self.stdout.write(f'total (es6):\t{_es6_count}')
-        self.stdout.write(f'{OsfCountedUsageEvent.__name__}:\t{_style(_es8_count)}')
-        if start:
+        self.stdout.write(f'{es8_metrics.OsfCountedUsageRecord.__name__}:\t{_style(_es8_count)}')
+        if start:  # schedule (per-day?) tasks (if --start)
             self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
             # TODO: migrate_usage_events.apply_async(...)
 
     def _handle_usage_reports(self, *, start: bool):
-        _es6_count = PublicItemUsageReport.search().count()
-        _es8_count = PublicItemUsageReportEs8.search().count()
+        # display total report counts
+        _es6_count = es6_reports.PublicItemUsageReport.search().count()
+        _es8_count = es8_metrics.PublicItemUsageReportEs8.search().count()
         _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
-        self.stdout.write(f'{PublicItemUsageReport.__name__} (es6):\t{_es6_count}')
-        self.stdout.write(f'{PublicItemUsageReportEs8.__name__}:\t{_style(_es8_count)}')
+        self.stdout.write(f'{es6_reports.PublicItemUsageReport.__name__} (es6):\t{_es6_count}')
+        self.stdout.write(f'{es8_metrics.PublicItemUsageReportEs8.__name__}:\t{_style(_es8_count)}')
+        # display distinct item counts
         _item_count
         # (if --start) schedule task per item (by composite agg on es6 public usage reports)
         # each item-task iter thru reports oldest to newest, adding cumulative counts

From 9ed70f3cda89f38b216455e3986f088f03814842 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 15 Apr 2026 11:55:33 -0400
Subject: [PATCH 045/100] quieter elastic logs

---
 conftest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conftest.py b/conftest.py
index 198316f1cc4..e80c4e5c566 100644
--- a/conftest.py
+++ b/conftest.py
@@ -43,6 +43,8 @@ def pytest_configure(config):
     'transitions.core',
     'MARKDOWN',
     'elasticsearch',
+    'elastic_transport',
+    'elasticsearch_metrics',
 ]
 for logger_name in SILENT_LOGGERS:
     logging.getLogger(logger_name).setLevel(logging.CRITICAL)

From be1ed2feec851748d16c0b55b20815ce3aa87917 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 15 Apr 2026 11:56:05 -0400
Subject: [PATCH 046/100] wip

---
 .../commands/metrics_es8_migration.py         | 146 +++++++++++++++---
 osf/metrics/es8_metrics.py                    |  35 +++++
 2 files changed, 156 insertions(+), 25 deletions(-)

diff --git a/osf/management/commands/metrics_es8_migration.py b/osf/management/commands/metrics_es8_migration.py
index c2f765eef72..ff6cdfe8b0f 100644
--- a/osf/management/commands/metrics_es8_migration.py
+++ b/osf/management/commands/metrics_es8_migration.py
@@ -1,7 +1,6 @@
 import datetime
 import logging
 
-
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
 from elasticsearch6 import helpers as es6_helpers
@@ -22,6 +21,10 @@
 
 _logger = logging.getLogger(__name__)
 
+_USAGE_MONTHS_BACK = 3
+
+_MAX_CARDINALITY_PRECISION = 40000  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control
+
 _UNCHANGED_RECORDTYPES = {
     # reports
     es6_reports.StorageAddonUsage: es8_metrics.StorageAddonUsageEs8,
@@ -40,7 +43,6 @@
     RegistriesModerationMetrics: es8_metrics.RegistriesModerationMetricsEs8,
 }
 
-
 def _debug_migrate(es8_client, each_new):
     for _each in each_new:
         print(_each)
@@ -49,7 +51,6 @@ def _debug_migrate(es8_client, each_new):
 def _do_migrate(es8_client, each_new):
     es8_helpers.bulk(es8_client, each_new, ..., stats_only=True)
 
-
 def _es6_scan(es6_recordtype, from_when: str, until_when: str):
     return es6_helpers.scan(
         es6_client,
@@ -58,6 +59,46 @@ def _es6_scan(es6_recordtype, from_when: str, until_when: str):
     )
 
 
+def _es6_usage_report_counts() -> tuple[int, int]:
+    _search = (
+        es6_reports.PublicItemUsageReport.search()
+    )
+    _search.aggs.metric(
+        'agg_item_count',
+        'cardinality',
+        field='item_osfid',
+        precision_threshold=_MAX_CARDINALITY_PRECISION,
+    )
+    _response = _search.execute()
+    _total_count = _response.hits.total
+    _item_count = (
+        _response.aggregations.agg_item_count.value
+        if 'agg_item_count' in _response.aggregations
+        else 0
+    )
+    return (_total_count, _item_count)
+
+
+def _es8_usage_report_counts() -> tuple[int, int]:
+    _search = (
+        es8_metrics.PublicItemUsageReportEs8.search()
+    )
+    _search.aggs.metric(
+        'agg_item_count',
+        'cardinality',
+        field='item_osfid',
+        precision_threshold=_MAX_CARDINALITY_PRECISION,
+    )
+    _response = _search.execute()
+    _total_count = _response.hits.total.value
+    _item_count = (
+        _response.aggregations.agg_item_count.value
+        if 'agg_item_count' in _response.aggregations
+        else 0
+    )
+    return (_total_count, _item_count)
+
+
 def _cycle_coverage_daily(report_date): ...
 
 
@@ -114,7 +155,6 @@ def migrate_usage_reports(from_date, until_date):
     # add cumulative count
     ...
 
-
 class Command(BaseCommand):
     def add_arguments(self, parser):
         parser.add_argument(
@@ -133,9 +173,22 @@ def add_arguments(self, parser):
             "--usage-reports",
             action="store_true",
         )
+        parser.add_argument(
+            "--clear-state",
+            action="store_true",
+        )
+        parser.add_argument(
+            "--no-setup",
+            action="store_true",
+        )
 
-    def handle(self, *, start, unchanged, usage_events, usage_reports, **kwargs):
-        call_command('djelme_backend_setup')  # ensure all index templates
+    def handle(self, *, start, unchanged, usage_events, usage_reports, clear_state, no_setup, **kwargs):
+        self._quiet_chatty_loggers()
+        if not no_setup:
+            call_command('djelme_backend_setup')
+        if clear_state:
+            self._clear_state()
+        self._display_started_at(start=start)
         _default_all = not any((unchanged, usage_events, usage_reports))
         if unchanged or _default_all:
             self._handle_unchanged(start=start)
@@ -150,11 +203,11 @@ def _handle_unchanged(self, *, start: bool):
             # display counts
             _es6_count = _es6_cls.search().count()
             _es8_count = _es8_cls.search().count()
-            _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
-            self.stdout.write(f'{_es6_cls.__name__} (es6):\t{_es6_count}')
-            self.stdout.write(f'{_es8_cls.__name__}:\t{_style(_es8_count)}')
+            #_es8_count = _es8_cls.search().count()
+            self._write_tabbed('es6', _es6_cls, _es6_count)
+            self._write_tabbed('es8', _es8_cls, _es8_count, style=self._eq_style(_es8_count, _es6_count))
             if start:  # schedule task
-                self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
+                self._write_tabbed('starting', _es6_cls, '=>', _es8_cls)
                 # TODO: migrate_unchanged_recordtype.apply_async(...)
             self.stdout.write('---')
 
@@ -167,24 +220,67 @@ def _handle_usage_events(self, *, start: bool):
         _es6_usage_event_count = CountedUsageEs6.search().count()
         _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
         _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
-        _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
-        self.stdout.write(f'{PreprintViewEs6.__name__} (es6):\t{_es6_pview_count}')
-        self.stdout.write(f'{PreprintDownloadEs6.__name__} (es6):\t{_es6_pdownload_count}')
-        self.stdout.write(f'{CountedUsageEs6.__name__} (es6):\t{_es6_pdownload_count}')
-        self.stdout.write(f'total (es6):\t{_es6_count}')
-        self.stdout.write(f'{es8_metrics.OsfCountedUsageRecord.__name__}:\t{_style(_es8_count)}')
+        self._write_tabbed('es6', PreprintViewEs6, _es6_pview_count)
+        self._write_tabbed('es6', PreprintDownloadEs6, _es6_pdownload_count)
+        self._write_tabbed('es6', CountedUsageEs6, _es6_usage_event_count)
+        self._write_tabbed('es6', '(total to migrate)', _es6_count)
+        self._write_tabbed('es8', es8_metrics.OsfCountedUsageRecord, _es8_count, style=self._eq_style(_es8_count, _es6_count))
         if start:  # schedule (per-day?) tasks (if --start)
-            self.stdout.write(f'starting {_es6_cls.__name__} => {_es8_cls.__name__}')
+            self.stdout.write(f'starting usages => {es8_metrics.OsfCountedUsageRecord}')
             # TODO: migrate_usage_events.apply_async(...)
+        self.stdout.write('---')
 
     def _handle_usage_reports(self, *, start: bool):
-        # display total report counts
-        _es6_count = es6_reports.PublicItemUsageReport.search().count()
-        _es8_count = es8_metrics.PublicItemUsageReportEs8.search().count()
-        _style = (self.style.SUCCESS if (_es6_count == _es8_count) else self.style.NOTICE)
-        self.stdout.write(f'{es6_reports.PublicItemUsageReport.__name__} (es6):\t{_es6_count}')
-        self.stdout.write(f'{es8_metrics.PublicItemUsageReportEs8.__name__}:\t{_style(_es8_count)}')
-        # display distinct item counts
-        _item_count
+        # display counts of reports and distinct items
+        _es6_count, _es6_item_count = _es6_usage_report_counts()
+        _es8_count, _es8_item_count = _es8_usage_report_counts()
+        self._write_tabbed('es6', es6_reports.PublicItemUsageReport, _es6_count)
+        self._write_tabbed('es8', es8_metrics.PublicItemUsageReportEs8, _es8_count, style=self._eq_style(_es8_count, _es6_count))
+        self._write_tabbed('es6', es6_reports.PublicItemUsageReport, '(items)', _es6_item_count)
+        self._write_tabbed('es8', es8_metrics.PublicItemUsageReportEs8, '(items)', _es8_item_count,
+                           style=self._eq_style(_es8_item_count, _es6_item_count))
         # (if --start) schedule task per item (by composite agg on es6 public usage reports)
         # each item-task iter thru reports oldest to newest, adding cumulative counts
+        if start:  # schedule per-item tasks
+            self.stdout.write(f'starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}')
+            # TODO: migrate_usage_events.apply_async(...)
+        self.stdout.write('---')
+
+    def _display_started_at(self, start):
+        _started_at = es8_metrics.Elastic6To8State.get_started_at()
+        if _started_at:
+            self.stdout.write(
+                f'osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}'
+            )
+        elif start:
+            _started_at = es8_metrics.Elastic6To8State.set_started_at_now()
+            self.stdout.write(
+                f'osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}'
+            )
+        else:
+            self.stdout.write(
+                'osf.metrics 6->8 migration not started nor starting (run with `--start` to start)'
+            )
+        self.stdout.write('---')
+
+    def _clear_state(self):
+        es8_metrics.Elastic6To8State.search().delete()
+
+    def _eq_style(self, num: int, should_be: int):
+        return self.style.SUCCESS if (num == should_be) else self.style.NOTICE
+
+    def _write_tabbed(self, *strables, style=None):
+        def _to_str(strable):
+            if isinstance(strable, type):
+                return strable.__name__
+            return str(strable)
+        self.stdout.write('\t'.join(map(_to_str, strables)), style)
+
+    def _quiet_chatty_loggers(self):
+        _chatty_loggers = [
+            'elasticsearch',
+            'elastic_transport',
+            'elasticsearch_metrics',
+        ]
+        for logger_name in _chatty_loggers:
+            logging.getLogger(logger_name).setLevel(logging.ERROR)
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 436a1c62d46..8b5e9dd5bc8 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -346,3 +346,38 @@ class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
     preprint_oopspam_hammed: int
     preprint_akismet_flagged: int
     preprint_akismet_hammed: int
+
+
+###
+# data migration state
+
+class Elastic6To8State(djelme.DjelmeRecordtype):
+    """index for storing values helpful for keeping track of the elastic 6->8 data migration"""
+    UNIQUE_TOGETHER_FIELDS = ('key',)
+    key: str
+    value: str | None
+    timestamp: datetime.datetime = esdsl.mapped_field(
+        default_factory=lambda: datetime.datetime.now(datetime.UTC),
+    )
+
+    class Index:
+        name = 'osf_elastic6to8state'
+
+    @classmethod
+    def get_by_key(cls, key: str):
+        _response = cls.search().query({'term': {'key': key}})[0].execute()
+        return _response[0] if _response else None
+
+    @classmethod
+    def get_timestamp(cls, key: str) -> datetime.datetime | None:
+        _record = cls.get_by_key(key)
+        return _record.timestamp if _record else None
+
+    @classmethod
+    def get_started_at(cls):
+        return cls.get_timestamp('started_at')
+
+    @classmethod
+    def set_started_at_now(cls):
+        _record = cls.record(key='started_at')
+        return _record.timestamp

From 64aeeaba0d84cf33d9c6726b86c8844f127520e3 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 15 Apr 2026 16:03:06 -0400
Subject: [PATCH 047/100] wip

---
 ...ics_es8_migration.py => migrate_metrics_6to8.py} | 13 ++++++-------
 poetry.lock                                         |  6 +++---
 pyproject.toml                                      |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)
 rename osf/management/commands/{metrics_es8_migration.py => migrate_metrics_6to8.py} (99%)

diff --git a/osf/management/commands/metrics_es8_migration.py b/osf/management/commands/migrate_metrics_6to8.py
similarity index 99%
rename from osf/management/commands/metrics_es8_migration.py
rename to osf/management/commands/migrate_metrics_6to8.py
index ff6cdfe8b0f..104caccfb6c 100644
--- a/osf/management/commands/metrics_es8_migration.py
+++ b/osf/management/commands/migrate_metrics_6to8.py
@@ -158,27 +158,27 @@ def migrate_usage_reports(from_date, until_date):
 class Command(BaseCommand):
     def add_arguments(self, parser):
         parser.add_argument(
-            "--start",
+            "--no-setup",
             action="store_true",
         )
         parser.add_argument(
-            "--unchanged",
+            "--clear-state",
             action="store_true",
         )
         parser.add_argument(
-            "--usage-events",
+            "--start",
             action="store_true",
         )
         parser.add_argument(
-            "--usage-reports",
+            "--unchanged",
             action="store_true",
         )
         parser.add_argument(
-            "--clear-state",
+            "--usage-events",
             action="store_true",
         )
         parser.add_argument(
-            "--no-setup",
+            "--usage-reports",
             action="store_true",
         )
 
@@ -203,7 +203,6 @@ def _handle_unchanged(self, *, start: bool):
             # display counts
             _es6_count = _es6_cls.search().count()
             _es8_count = _es8_cls.search().count()
-            #_es8_count = _es8_cls.search().count()
             self._write_tabbed('es6', _es6_cls, _es6_count)
             self._write_tabbed('es8', _es8_cls, _es8_count, style=self._eq_style(_es8_count, _es6_count))
             if start:  # schedule task
diff --git a/poetry.lock b/poetry.lock
index 90665bce81f..d86523f94de 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"
-resolved_reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"
+reference = "e18f029c406d743d407f18fda8a133b261f9c4d2"
+resolved_reference = "e18f029c406d743d407f18fda8a133b261f9c4d2"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "ef1d6d327f5557e43482793b276ccb6c5fd07989f27367af3a3736a8547b4d1a"
+content-hash = "320d3eb4cd7f0f4c5d8cc698db51ee1bf4c37f8b8d41d21a86ca5cdb9b2e6b42"
diff --git a/pyproject.toml b/pyproject.toml
index 013df3f448d..a5c39d297d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "e18f029c406d743d407f18fda8a133b261f9c4d2"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 97cd5b7f125c2413393d1276dec710f54b24a33a Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 16 Apr 2026 10:46:00 -0400
Subject: [PATCH 048/100] wip

---
 ...ics_6to8.py => migrate_osfmetrics_6to8.py} | 261 +++++++++++++-----
 osf/metrics/es8_metrics.py                    |   2 +-
 poetry.lock                                   |   6 +-
 pyproject.toml                                |   2 +-
 4 files changed, 198 insertions(+), 73 deletions(-)
 rename osf/management/commands/{migrate_metrics_6to8.py => migrate_osfmetrics_6to8.py} (50%)

diff --git a/osf/management/commands/migrate_metrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
similarity index 50%
rename from osf/management/commands/migrate_metrics_6to8.py
rename to osf/management/commands/migrate_osfmetrics_6to8.py
index 104caccfb6c..2f4cbb28385 100644
--- a/osf/management/commands/migrate_metrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -1,5 +1,6 @@
 import datetime
 import logging
+from pprint import pprint
 
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
@@ -7,7 +8,6 @@
 from elasticsearch8 import helpers as es8_helpers
 from elasticsearch_metrics.registry import djelme_registry
 from elasticsearch_metrics.imps import elastic8 as djel8me
-from elasticsearch_metrics.util.timeparts import format_timeparts
 
 from framework.celery_tasks import app as celery_app
 from osf.metrics.preprint_metrics import (
@@ -17,6 +17,7 @@
 from osf.metrics.counted_usage import CountedAuthUsage as CountedUsageEs6
 from osf.metrics import reports as es6_reports
 from osf.metrics import es8_metrics, RegistriesModerationMetrics
+from osf.metrics.utils import YearMonth
 
 
 _logger = logging.getLogger(__name__)
@@ -43,57 +44,76 @@
     RegistriesModerationMetrics: es8_metrics.RegistriesModerationMetricsEs8,
 }
 
-def _debug_migrate(es8_client, each_new):
+
+def _delete_all(recordtype):
+    # TODO: REMOVE THIS
+    recordtype.search().query({"match_all": {}}).delete()
+    recordtype.refresh()
+
+
+def _delete_all_es8():
+    # TODO: REMOVE THIS
+    for _es8_recordtype in _UNCHANGED_RECORDTYPES.values():
+        _delete_all(_es8_recordtype)
+    _delete_all(es8_metrics.PublicItemUsageReportEs8)
+    _delete_all(es8_metrics.OsfCountedUsageRecord)
+
+
+def _debug_migrate(each_new):
     for _each in each_new:
-        print(_each)
+        pprint(_each.to_dict())
 
 
 def _do_migrate(es8_client, each_new):
     es8_helpers.bulk(es8_client, each_new, ..., stats_only=True)
 
-def _es6_scan(es6_recordtype, from_when: str, until_when: str):
+
+def _es6_scan_all(es6_recordtype):
+    return es6_helpers.scan(
+        es6_recordtype._get_connection(),
+        index=es6_recordtype._template_pattern,
+    )
+
+
+def _es6_scan_range(es6_recordtype, from_when: str, until_when: str):
     return es6_helpers.scan(
-        es6_client,
+        es6_recordtype._get_connection(),
         index=es6_recordtype._template_pattern,
         query={"range": {"timestamp": {"gte": from_when, "lt": until_when}}},
     )
 
 
 def _es6_usage_report_counts() -> tuple[int, int]:
-    _search = (
-        es6_reports.PublicItemUsageReport.search()
-    )
+    _search = es6_reports.PublicItemUsageReport.search()
     _search.aggs.metric(
-        'agg_item_count',
-        'cardinality',
-        field='item_osfid',
+        "agg_item_count",
+        "cardinality",
+        field="item_osfid",
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _total_count = _response.hits.total
     _item_count = (
         _response.aggregations.agg_item_count.value
-        if 'agg_item_count' in _response.aggregations
+        if "agg_item_count" in _response.aggregations
         else 0
     )
     return (_total_count, _item_count)
 
 
 def _es8_usage_report_counts() -> tuple[int, int]:
-    _search = (
-        es8_metrics.PublicItemUsageReportEs8.search()
-    )
+    _search = es8_metrics.PublicItemUsageReportEs8.search()
     _search.aggs.metric(
-        'agg_item_count',
-        'cardinality',
-        field='item_osfid',
+        "agg_item_count",
+        "cardinality",
+        field="item_osfid",
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _total_count = _response.hits.total.value
     _item_count = (
         _response.aggregations.agg_item_count.value
-        if 'agg_item_count' in _response.aggregations
+        if "agg_item_count" in _response.aggregations
         else 0
     )
     return (_total_count, _item_count)
@@ -105,36 +125,95 @@ def _cycle_coverage_daily(report_date): ...
 def _cycle_coverage_monthly(report_yearmonth): ...
 
 
-def _unchanged_report_kwargs(es6_recordtype, hit):
+def _get_es6_field_names(es6_recordtype):
+    """
+    adapted from DocumentBase._get_field_names in elasticsearch8.dsl
+    """
+    for _field_name in es6_recordtype._doc_type.mapping:
+        _field = es6_recordtype._doc_type.mapping[_field_name]
+        if hasattr(_field, "_doc_class"):
+            for _sub_field in _get_es6_field_names(_field._doc_class):
+                yield f"{_field_name}.{_sub_field}"
+        else:
+            yield _field_name
+
+
+def _assert_field_unchangedness(es6_recordtype, es8_recordtype):
+    _es6_fields = set(_get_es6_field_names(es6_recordtype))
+    _es8_fields = set(es8_recordtype._get_field_names())
+
+    # remove fields intentionally removed/renamed in migration
     if issubclass(es6_recordtype, es6_reports.DailyReport):
-        _cycle_coverage = format_timeparts(
-            datetime.date.fromisoformat(hit.pop("report_date")), djel8me.DAILY
-        )
+        assert issubclass(es8_recordtype, djel8me.CyclicRecord)
+        _es6_fields.remove("timestamp")
+        _es6_fields.remove("report_date")
     elif issubclass(es6_recordtype, es6_reports.MonthlyReport):
-        _cycle_coverage = format_timeparts(hit.pop("report_yearmonth"), djel8me.MONTHLY)
-    return {
-        **hit,
-        'cycle_coverage': _cycle_coverage,
-    }
+        assert issubclass(es8_recordtype, djel8me.CyclicRecord)
+        _es6_fields.remove("timestamp")
+        _es6_fields.remove("report_yearmonth")
+    else:
+        assert issubclass(es8_recordtype, djel8me.EventRecord)
 
+    # remove fields intentionally added in migration
+    _es8_fields.remove("timeseries_timeparts")
+    if issubclass(es8_recordtype, djel8me.CyclicRecord):
+        _es8_fields.remove("created")
+        _es8_fields.remove("cycle_coverage")
 
-@celery_app.task
+    # all remaining fields should match
+    assert _es6_fields == _es8_fields
+
+
+# TODO: @celery_app.task
 def migrate_unchanged_recordtype(
     es6_recordtype_name: str,
 ):
     _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
     _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
+    _assert_field_unchangedness(_es6_recordtype, _es8_recordtype)
+    _kwarg_converter = (
+        _each_cyclicrecord_kwarg
+        if issubclass(
+            _es6_recordtype, (es6_reports.DailyReport, es6_reports.MonthlyReport)
+        )
+        else _each_eventrecord_kwarg
+    )
 
     def _each_new():
-        for _hit in _es6_scan(_es6_recordtype, from_when, until_when):
-            breakpoint()
+        for _hit in _es6_scan_all(_es6_recordtype):
+            _es8_kwargs = dict(_kwarg_converter(_hit["_source"]))
             yield _es8_recordtype.record(
-                ...,
-                using=False,  # saved in bulk
+                **_es8_kwargs,
+                using=False,  # skip saving; save in bulk
             )
 
     _debug_migrate(_each_new())
-    # _do_migrate(_each_new())
+    # TODO: _do_migrate(_es8_recordtype._get_connection(), _each_new())
+
+
+def _semverish_from_yearmonth(given_yearmonth: str):
+    _ym = YearMonth.from_str(given_yearmonth)
+    return f"{_ym.year}.{_ym.month}"
+
+
+def _semverish_from_date(given_date: str):
+    _d = datetime.date.fromisoformat(given_date)
+    return f"{_d.year}.{_d.month}.{_d.day}"
+
+
+def _each_cyclicrecord_kwarg(es6_source: dict):
+    for _key, _val in es6_source.items():
+        if _key == "report_yearmonth":
+            yield ("cycle_coverage", _semverish_from_yearmonth(_val))
+        elif _key == "report_date":
+            yield ("cycle_coverage", _semverish_from_date(_val))
+        elif _key != "timestamp":
+            # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
+            yield (_key, _val)
+
+
+def _each_eventrecord_kwarg(es6_source) -> dict:
+    yield from es6_source.items()  # no changes needed
 
 
 @celery_app.task
@@ -155,6 +234,7 @@ def migrate_usage_reports(from_date, until_date):
     # add cumulative count
     ...
 
+
 class Command(BaseCommand):
     def add_arguments(self, parser):
         parser.add_argument(
@@ -182,13 +262,23 @@ def add_arguments(self, parser):
             action="store_true",
         )
 
-    def handle(self, *, start, unchanged, usage_events, usage_reports, clear_state, no_setup, **kwargs):
+    def handle(
+        self,
+        *,
+        start,
+        unchanged,
+        usage_events,
+        usage_reports,
+        clear_state,
+        no_setup,
+        **kwargs,
+    ):
         self._quiet_chatty_loggers()
         if not no_setup:
-            call_command('djelme_backend_setup')
+            call_command("djelme_backend_setup")
         if clear_state:
             self._clear_state()
-        self._display_started_at(start=start)
+        self._migration_start(start=start)
         _default_all = not any((unchanged, usage_events, usage_reports))
         if unchanged or _default_all:
             self._handle_unchanged(start=start)
@@ -203,12 +293,18 @@ def _handle_unchanged(self, *, start: bool):
             # display counts
             _es6_count = _es6_cls.search().count()
             _es8_count = _es8_cls.search().count()
-            self._write_tabbed('es6', _es6_cls, _es6_count)
-            self._write_tabbed('es8', _es8_cls, _es8_count, style=self._eq_style(_es8_count, _es6_count))
+            self._write_tabbed("es6", _es6_cls, _es6_count)
+            self._write_tabbed(
+                "es8",
+                _es8_cls,
+                _es8_count,
+                style=self._eq_style(_es8_count, _es6_count),
+            )
             if start:  # schedule task
-                self._write_tabbed('starting', _es6_cls, '=>', _es8_cls)
+                self._write_tabbed("starting", _es6_cls, "=>", _es8_cls)
+                migrate_unchanged_recordtype(_es6_cls.__name__)
                 # TODO: migrate_unchanged_recordtype.apply_async(...)
-            self.stdout.write('---')
+            self.stdout.write("---")
 
     def _handle_usage_events(self, *, start: bool):
         # for counted-usage events:
@@ -219,67 +315,96 @@ def _handle_usage_events(self, *, start: bool):
         _es6_usage_event_count = CountedUsageEs6.search().count()
         _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
         _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
-        self._write_tabbed('es6', PreprintViewEs6, _es6_pview_count)
-        self._write_tabbed('es6', PreprintDownloadEs6, _es6_pdownload_count)
-        self._write_tabbed('es6', CountedUsageEs6, _es6_usage_event_count)
-        self._write_tabbed('es6', '(total to migrate)', _es6_count)
-        self._write_tabbed('es8', es8_metrics.OsfCountedUsageRecord, _es8_count, style=self._eq_style(_es8_count, _es6_count))
+        self._write_tabbed("es6", PreprintViewEs6, _es6_pview_count)
+        self._write_tabbed("es6", PreprintDownloadEs6, _es6_pdownload_count)
+        self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
+        self._write_tabbed("es6", "(total to migrate)", _es6_count)
+        self._write_tabbed(
+            "es8",
+            es8_metrics.OsfCountedUsageRecord,
+            _es8_count,
+            style=self._eq_style(_es8_count, _es6_count),
+        )
         if start:  # schedule (per-day?) tasks (if --start)
-            self.stdout.write(f'starting usages => {es8_metrics.OsfCountedUsageRecord}')
-            # TODO: migrate_usage_events.apply_async(...)
-        self.stdout.write('---')
+            self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord}")
+            for _from_date, _until_date in _each_date_in_range(...):
+                migrate_usage_events(_from_date.isoformat(), _until_date.isoformat())
+                # TODO: migrate_usage_events.apply_async(...)
+        self.stdout.write("---")
 
     def _handle_usage_reports(self, *, start: bool):
         # display counts of reports and distinct items
         _es6_count, _es6_item_count = _es6_usage_report_counts()
         _es8_count, _es8_item_count = _es8_usage_report_counts()
-        self._write_tabbed('es6', es6_reports.PublicItemUsageReport, _es6_count)
-        self._write_tabbed('es8', es8_metrics.PublicItemUsageReportEs8, _es8_count, style=self._eq_style(_es8_count, _es6_count))
-        self._write_tabbed('es6', es6_reports.PublicItemUsageReport, '(items)', _es6_item_count)
-        self._write_tabbed('es8', es8_metrics.PublicItemUsageReportEs8, '(items)', _es8_item_count,
-                           style=self._eq_style(_es8_item_count, _es6_item_count))
+        self._write_tabbed("es6", es6_reports.PublicItemUsageReport, _es6_count)
+        self._write_tabbed(
+            "es8",
+            es8_metrics.PublicItemUsageReportEs8,
+            _es8_count,
+            style=self._eq_style(_es8_count, _es6_count),
+        )
+        self._write_tabbed(
+            "es6", es6_reports.PublicItemUsageReport, "(items)", _es6_item_count
+        )
+        self._write_tabbed(
+            "es8",
+            es8_metrics.PublicItemUsageReportEs8,
+            "(items)",
+            _es8_item_count,
+            style=self._eq_style(_es8_item_count, _es6_item_count),
+        )
         # (if --start) schedule task per item (by composite agg on es6 public usage reports)
         # each item-task iter thru reports oldest to newest, adding cumulative counts
         if start:  # schedule per-item tasks
-            self.stdout.write(f'starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}')
-            # TODO: migrate_usage_events.apply_async(...)
-        self.stdout.write('---')
+            self.stdout.write(
+                f"starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}"
+            )
+            # TODO: migrate_usage_reports.apply_async(...)
+        self.stdout.write("---")
 
-    def _display_started_at(self, start):
+    def _migration_start(self, start):
         _started_at = es8_metrics.Elastic6To8State.get_started_at()
         if _started_at:
             self.stdout.write(
-                f'osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}'
+                f"osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}"
             )
         elif start:
             _started_at = es8_metrics.Elastic6To8State.set_started_at_now()
             self.stdout.write(
-                f'osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}'
+                f"osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}"
             )
         else:
             self.stdout.write(
-                'osf.metrics 6->8 migration not started nor starting (run with `--start` to start)'
+                "osf.metrics 6->8 migration not started nor starting (run with `--start` to start)"
             )
-        self.stdout.write('---')
+        self.stdout.write("---")
 
     def _clear_state(self):
-        es8_metrics.Elastic6To8State.search().delete()
+        self.stdout.write(
+            "clearing all migration state (start time, etc)", self.style.NOTICE
+        )
+        es8_metrics.Elastic6To8State.search().query({"match_all": {}}).delete()
+        es8_metrics.Elastic6To8State.refresh()
+        # TODO: REMOVE THIS
+        self.stdout.write("deleting all migration target data in es8", self.style.ERROR)
+        _delete_all_es8()
 
     def _eq_style(self, num: int, should_be: int):
-        return self.style.SUCCESS if (num == should_be) else self.style.NOTICE
+        return self.style.SUCCESS if (num == should_be) else self.style.WARNING
 
     def _write_tabbed(self, *strables, style=None):
         def _to_str(strable):
             if isinstance(strable, type):
                 return strable.__name__
             return str(strable)
-        self.stdout.write('\t'.join(map(_to_str, strables)), style)
+
+        self.stdout.write("\t".join(map(_to_str, strables)), style)
 
     def _quiet_chatty_loggers(self):
         _chatty_loggers = [
-            'elasticsearch',
-            'elastic_transport',
-            'elasticsearch_metrics',
+            "elasticsearch",
+            "elastic_transport",
+            "elasticsearch_metrics",
         ]
         for logger_name in _chatty_loggers:
             logging.getLogger(logger_name).setLevel(logging.ERROR)
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 8b5e9dd5bc8..4980358dc5f 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -351,7 +351,7 @@ class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
 ###
 # data migration state
 
-class Elastic6To8State(djelme.DjelmeRecordtype):
+class Elastic6To8State(djelme.SimpleRecord):
     """index for storing values helpful for keeping track of the elastic 6->8 data migration"""
     UNIQUE_TOGETHER_FIELDS = ('key',)
     key: str
diff --git a/poetry.lock b/poetry.lock
index d86523f94de..df08934ef29 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "e18f029c406d743d407f18fda8a133b261f9c4d2"
-resolved_reference = "e18f029c406d743d407f18fda8a133b261f9c4d2"
+reference = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"
+resolved_reference = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "320d3eb4cd7f0f4c5d8cc698db51ee1bf4c37f8b8d41d21a86ca5cdb9b2e6b42"
+content-hash = "d149bb933fd3845714e26920360c34f3224ab0f84a789b3185cf716033a8d4bf"
diff --git a/pyproject.toml b/pyproject.toml
index a5c39d297d1..4b6f896f39e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "e18f029c406d743d407f18fda8a133b261f9c4d2"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 7eba5cce220851ac479b74b67e1baa71e94d2c95 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 16 Apr 2026 16:25:28 -0400
Subject: [PATCH 049/100] wip

---
 .../commands/migrate_osfmetrics_6to8.py       | 222 +++++++++++++-----
 osf/metrics/es8_metrics.py                    |   1 +
 2 files changed, 167 insertions(+), 56 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 2f4cbb28385..812322657a9 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -1,4 +1,6 @@
+import collections
 import datetime
+import functools
 import logging
 from pprint import pprint
 
@@ -10,6 +12,7 @@
 from elasticsearch_metrics.imps import elastic8 as djel8me
 
 from framework.celery_tasks import app as celery_app
+from osf.metadata import rdfutils
 from osf.metrics.preprint_metrics import (
     PreprintView as PreprintViewEs6,
     PreprintDownload as PreprintDownloadEs6,
@@ -18,10 +21,14 @@
 from osf.metrics import reports as es6_reports
 from osf.metrics import es8_metrics, RegistriesModerationMetrics
 from osf.metrics.utils import YearMonth
+from website import settings as website_settings
 
 
 _logger = logging.getLogger(__name__)
 
+###
+# constants
+
 _USAGE_MONTHS_BACK = 3
 
 _MAX_CARDINALITY_PRECISION = 40000  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control
@@ -45,6 +52,68 @@
 }
 
 
+###
+# celery tasks
+
+
+# TODO: @celery_app.task
+def migrate_unchanged_recordtype(es6_recordtype_name: str):
+    _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
+    _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
+    _assert_field_unchangedness(_es6_recordtype, _es8_recordtype)
+
+    if issubclass(_es8_recordtype, djel8me.CyclicRecord):
+
+        def _new_es8_record(source_dict):
+            _kwargs = dict(_convert_cyclicrecord_kwargs(source_dict))
+            return _es8_recordtype(**_kwargs)
+
+    else:  # no conversion needed for event record with unchanged fields
+
+        def _new_es8_record(source_dict):
+            return _es8_recordtype(**source_dict)
+
+    def _each_new():
+        for _hit in _es6_scan_all(_es6_recordtype):
+            yield _new_es8_record(_hit["_source"])
+
+    _debug_migrate(_each_new())
+    # TODO: _do_migrate(_es8_recordtype._get_connection(), _each_new())
+
+
+# TODO: @celery_app.task
+def migrate_counted_usages(from_when: str, until_when: str):
+    # CountedAuthUsage => OsfCountedUsageRecord
+    def _each_new():
+        for _hit in _es6_scan_all(CountedUsageEs6, from_when, until_when):
+            yield _convert_counted_usage(_hit["_source"])
+
+    _debug_migrate(_each_new())
+
+
+# TODO: @celery_app.task
+def migrate_preprint_views(from_date: str, until_date: str):
+    # convert to counted-usage
+    ...
+
+
+# TODO: @celery_app.task
+def migrate_preprint_downloads(from_date: str, until_date: str):
+    # convert to counted-usage
+    ...
+
+
+# TODO: @celery_app.task
+def migrate_usage_reports(from_date, until_date):
+    # from PublicItemUsageReport to PublicItemUsageReportEs8
+    # add cumulative count
+    ...
+
+
+###
+# various helper functions
+
+
 def _delete_all(recordtype):
     # TODO: REMOVE THIS
     recordtype.search().query({"match_all": {}}).delete()
@@ -60,6 +129,7 @@ def _delete_all_es8():
 
 
 def _debug_migrate(each_new):
+    # TODO: remove this
     for _each in each_new:
         pprint(_each.to_dict())
 
@@ -68,6 +138,18 @@ def _do_migrate(es8_client, each_new):
     es8_helpers.bulk(es8_client, each_new, ..., stats_only=True)
 
 
+def _date_range(
+    range_start: datetime.date,
+    range_end: datetime.date,
+    step: datetime.timedelta = datetime.timedelta(days=1),
+) -> collections.abc.Iterator[tuple[datetime.date, datetime.date]]:
+    _from_date = range_start
+    _until_date = range_start + step
+    while _from_date < range_end:
+        yield (_from_date, _until_date)
+        (_from_date, _until_date) = (_until_date, _until_date + step)
+
+
 def _es6_scan_all(es6_recordtype):
     return es6_helpers.scan(
         es6_recordtype._get_connection(),
@@ -119,12 +201,6 @@ def _es8_usage_report_counts() -> tuple[int, int]:
     return (_total_count, _item_count)
 
 
-def _cycle_coverage_daily(report_date): ...
-
-
-def _cycle_coverage_monthly(report_yearmonth): ...
-
-
 def _get_es6_field_names(es6_recordtype):
     """
     adapted from DocumentBase._get_field_names in elasticsearch8.dsl
@@ -164,33 +240,6 @@ def _assert_field_unchangedness(es6_recordtype, es8_recordtype):
     assert _es6_fields == _es8_fields
 
 
-# TODO: @celery_app.task
-def migrate_unchanged_recordtype(
-    es6_recordtype_name: str,
-):
-    _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
-    _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
-    _assert_field_unchangedness(_es6_recordtype, _es8_recordtype)
-    _kwarg_converter = (
-        _each_cyclicrecord_kwarg
-        if issubclass(
-            _es6_recordtype, (es6_reports.DailyReport, es6_reports.MonthlyReport)
-        )
-        else _each_eventrecord_kwarg
-    )
-
-    def _each_new():
-        for _hit in _es6_scan_all(_es6_recordtype):
-            _es8_kwargs = dict(_kwarg_converter(_hit["_source"]))
-            yield _es8_recordtype.record(
-                **_es8_kwargs,
-                using=False,  # skip saving; save in bulk
-            )
-
-    _debug_migrate(_each_new())
-    # TODO: _do_migrate(_es8_recordtype._get_connection(), _each_new())
-
-
 def _semverish_from_yearmonth(given_yearmonth: str):
     _ym = YearMonth.from_str(given_yearmonth)
     return f"{_ym.year}.{_ym.month}"
@@ -201,38 +250,85 @@ def _semverish_from_date(given_date: str):
     return f"{_d.year}.{_d.month}.{_d.day}"
 
 
-def _each_cyclicrecord_kwarg(es6_source: dict):
+def _convert_cyclicrecord_kwargs(es6_source: dict):
     for _key, _val in es6_source.items():
         if _key == "report_yearmonth":
+            # report_yearmonth converts to cycle_coverage Y.M
             yield ("cycle_coverage", _semverish_from_yearmonth(_val))
         elif _key == "report_date":
+            # report_date converts to cycle_coverage Y.M.D
             yield ("cycle_coverage", _semverish_from_date(_val))
         elif _key != "timestamp":
             # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
             yield (_key, _val)
 
 
-def _each_eventrecord_kwarg(es6_source) -> dict:
-    yield from es6_source.items()  # no changes needed
+def _convert_counted_usage(source_dict) -> es8_metrics.OsfCountedUsageRecord:
+    _item_iri = _iri_from_osfid(source_dict["item_guid"])
+    return es8_metrics.OsfCountedUsageRecord(
+        # fields from djelme.CountedUsageRecord
+        timestamp=source_dict["timestamp"],
+        sessionhour_id=source_dict["session_id"],
+        platform_iri=source_dict["platform_iri"],
+        # TODO: database_iri=provider iri
+        item_iri=_item_iri,
+        within_iris=[
+            _item_iri,  # correct mistake; make inclusive-within aggregations easier
+            *(
+                _iri_from_osfid(_within_osfid)
+                for _within_osfid in source_dict["surrounding_guids"]
+            ),
+        ],
+        # fields from OsfCountedUsageRecord
+        item_osfid=source_dict["item_guid"],
+        item_type=_convert_item_type(source_dict),
+        item_public=source_dict["item_public"],
+        provider_id=source_dict["provider_id"],
+        user_is_authenticated=source_dict["user_is_authenticated"],
+        action_labels=source_dict["action_labels"],
+        pageview_info=source_dict[
+            "pageview_info"
+        ],  # TODO: does this need the PageviewInfo object?
+    )
 
 
-@celery_app.task
-def migrate_preprint_views(from_date, until_date):
-    # convert to counted-usage
-    ...
+def _iri_from_osfid(osfid: str) -> str:
+    return f"{website_settings.DOMAIN}{osfid}"
 
 
-@celery_app.task
-def migrate_preprint_downloads(from_date, until_date):
-    # convert to counted-usage
-    ...
+def _convert_item_type(es6_usage_dict):
+    """convert model-name item types to OSFMAP item types
 
+    previous item_types use `type(osf_model).__name__.lower()`
+    """
+    _modelname = es6_usage_dict["item_type"]
+    assert isinstance(_modelname, str)
+    match _modelname:
+        case "osfuser":
+            return rdfutils.DCTERMS.Agent
+        case "preprint":
+            return rdfutils.OSF.Preprint
+        case "registration":
+            return (
+                rdfutils.OSF.RegistrationComponent
+                if es6_usage_dict.get("surrounding_guids")
+                else rdfutils.OSF.Registration
+            )
+        case "node":
+            return (
+                rdfutils.OSF.ProjectComponent
+                if es6_usage_dict.get("surrounding_guids")
+                else rdfutils.OSF.Project
+            )
+        case _ if "file" in _modelname:
+            return rdfutils.OSF.File
+        case _:
+            _logger.error(f"unknown item type: {_modelname}")
+            return _modelname  # give up
 
-@celery_app.task
-def migrate_usage_reports(from_date, until_date):
-    # from PublicItemUsageReport to PublicItemUsageReportEs8
-    # add cumulative count
-    ...
+
+###
+# the command itself
 
 
 class Command(BaseCommand):
@@ -278,7 +374,7 @@ def handle(
             call_command("djelme_backend_setup")
         if clear_state:
             self._clear_state()
-        self._migration_start(start=start)
+        self._check_started_at(start_now=start)
         _default_all = not any((unchanged, usage_events, usage_reports))
         if unchanged or _default_all:
             self._handle_unchanged(start=start)
@@ -327,9 +423,18 @@ def _handle_usage_events(self, *, start: bool):
         )
         if start:  # schedule (per-day?) tasks (if --start)
             self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord}")
-            for _from_date, _until_date in _each_date_in_range(...):
-                migrate_usage_events(_from_date.isoformat(), _until_date.isoformat())
-                # TODO: migrate_usage_events.apply_async(...)
+            _started = self._migration_started_at
+            _range_start = (
+                _started - datetime.timedelta(months=_USAGE_MONTHS_BACK)
+            ).date
+            _range_end = _started.date() + datetime.timedelta(days=1)
+            for _from_date, _until_date in _date_range(_range_start, _range_end):
+                _from_str = _from_date.isoformat()
+                _until_str = _until_date.isoformat()
+                # TODO: .apply_async(...)
+                migrate_counted_usages(_from_str, _until_str)
+                migrate_preprint_views(_from_str, _until_str)
+                migrate_preprint_downloads(_from_str, _until_str)
         self.stdout.write("---")
 
     def _handle_usage_reports(self, *, start: bool):
@@ -362,13 +467,18 @@ def _handle_usage_reports(self, *, start: bool):
             # TODO: migrate_usage_reports.apply_async(...)
         self.stdout.write("---")
 
-    def _migration_start(self, start):
-        _started_at = es8_metrics.Elastic6To8State.get_started_at()
+    @functools.cached_property
+    def _migration_started_at(self):
+        return es8_metrics.Elastic6To8State.get_started_at()
+
+    def _check_started_at(self, start_now):
+        _started_at = self._migration_started_at
         if _started_at:
             self.stdout.write(
                 f"osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}"
             )
-        elif start:
+        elif start_now:
+            del self._migration_started_at  # clear cache
             _started_at = es8_metrics.Elastic6To8State.set_started_at_now()
             self.stdout.write(
                 f"osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}"
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 4980358dc5f..3be81e9262e 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -89,6 +89,7 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     item_osfid: str
     item_type: str
     item_public: bool
+    provider_id: str
     user_is_authenticated: bool
     action_labels: list[str]
     pageview_info: PageviewInfo

From 7d554b66ba27447f79179c8e997427215302a11b Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 17 Apr 2026 14:14:20 -0400
Subject: [PATCH 050/100] wip

---
 .../commands/migrate_osfmetrics_6to8.py       | 189 +++++++++++-------
 osf/metrics/es8_metrics.py                    |   9 +-
 poetry.lock                                   |   6 +-
 pyproject.toml                                |   2 +-
 4 files changed, 130 insertions(+), 76 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 812322657a9..f0e1147f025 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -7,15 +7,16 @@
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
 from elasticsearch6 import helpers as es6_helpers
-from elasticsearch8 import helpers as es8_helpers
+from elasticsearch6_dsl.connections import connections as es6_connections
+from elasticsearch8.dsl.connections import connections as es8_connections
 from elasticsearch_metrics.registry import djelme_registry
 from elasticsearch_metrics.imps import elastic8 as djel8me
 
 from framework.celery_tasks import app as celery_app
 from osf.metadata import rdfutils
 from osf.metrics.preprint_metrics import (
-    PreprintView as PreprintViewEs6,
-    PreprintDownload as PreprintDownloadEs6,
+    PreprintView,
+    PreprintDownload,
 )
 from osf.metrics.counted_usage import CountedAuthUsage as CountedUsageEs6
 from osf.metrics import reports as es6_reports
@@ -29,7 +30,7 @@
 ###
 # constants
 
-_USAGE_MONTHS_BACK = 3
+_USAGE_DAYS_BACK = 99
 
 _MAX_CARDINALITY_PRECISION = 40000  # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control
 
@@ -61,58 +62,76 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str):
     _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
     _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
     _assert_field_unchangedness(_es6_recordtype, _es8_recordtype)
-
-    if issubclass(_es8_recordtype, djel8me.CyclicRecord):
-
-        def _new_es8_record(source_dict):
-            _kwargs = dict(_convert_cyclicrecord_kwargs(source_dict))
-            return _es8_recordtype(**_kwargs)
-
-    else:  # no conversion needed for event record with unchanged fields
-
-        def _new_es8_record(source_dict):
-            return _es8_recordtype(**source_dict)
-
-    def _each_new():
-        for _hit in _es6_scan_all(_es6_recordtype):
-            yield _new_es8_record(_hit["_source"])
-
-    _debug_migrate(_each_new())
-    # TODO: _do_migrate(_es8_recordtype._get_connection(), _each_new())
+    _convert_kwargs = (
+        _convert_unchanged_cyclicrecord_kwargs
+        if issubclass(_es8_recordtype, djel8me.CyclicRecord)
+        else (lambda _kw: _kw)  # no conversion needed for event record
+    )
+    _each_new = (
+        _es8_recordtype(**_convert_kwargs(_hit["_source"]))
+        for _hit in _es6_scan_all(_es6_recordtype)
+    )
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(_es8_recordtype, _each_new)
 
 
 # TODO: @celery_app.task
 def migrate_counted_usages(from_when: str, until_when: str):
     # CountedAuthUsage => OsfCountedUsageRecord
-    def _each_new():
-        for _hit in _es6_scan_all(CountedUsageEs6, from_when, until_when):
-            yield _convert_counted_usage(_hit["_source"])
-
-    _debug_migrate(_each_new())
+    _each_new = (
+        _convert_counted_usage(_hit["_source"])
+        for _hit in _es6_scan_range(CountedUsageEs6, from_when, until_when)
+    )
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 # TODO: @celery_app.task
-def migrate_preprint_views(from_date: str, until_date: str):
-    # convert to counted-usage
-    ...
+def migrate_preprint_views(from_when: str, until_when: str):
+    # PreprintView => OsfCountedUsageRecord
+    _action_labels = ['view', 'web']
+    _each_new = (
+        _convert_preprint_metric(_hit["_source"], _action_labels)
+        for _hit in _es6_scan_range(PreprintView, from_when, until_when)
+    )
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 # TODO: @celery_app.task
-def migrate_preprint_downloads(from_date: str, until_date: str):
-    # convert to counted-usage
-    ...
+def migrate_preprint_downloads(from_when: str, until_when: str):
+    # PreprintDownload => OsfCountedUsageRecord
+    _action_labels = ['download']
+    _each_new = (
+        _convert_preprint_metric(_hit["_source"], _action_labels)
+        for _hit in _es6_scan_range(PreprintDownload, from_when, until_when)
+    )
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 # TODO: @celery_app.task
-def migrate_usage_reports(from_date, until_date):
+def migrate_usage_reports(osfid: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
     # add cumulative count
-    ...
+    def _each_new():
+        for _hit in _es6_scan_all(CountedUsageEs6, query=...):
+            yield ...(_hit["_source"])
+
+    _debug_migrate(_each_new)
+    # TODO: return _es8_bulk_save(PublicItemUsageReportEs8, _each_new)
 
 
 ###
 # various helper functions
 
+def _es6_connection():
+    return es6_connections.get_connection('osfmetrics_es6')
+
+
+def _es8_connection():
+    return es8_connections.get_connection('osfmetrics_es8')
+
 
 def _delete_all(recordtype):
     # TODO: REMOVE THIS
@@ -131,11 +150,15 @@ def _delete_all_es8():
 def _debug_migrate(each_new):
     # TODO: remove this
     for _each in each_new:
-        pprint(_each.to_dict())
+        pprint(_each.to_dict(include_meta=True))
 
 
-def _do_migrate(es8_client, each_new):
-    es8_helpers.bulk(es8_client, each_new, ..., stats_only=True)
+def _es8_bulk_save(es8_recordtype, each_new_record):
+    _success_count, _fail_count = es8_recordtype.bulk(
+        each_new_record,
+        stats_only=True,
+    )
+    return _success_count
 
 
 def _date_range(
@@ -150,18 +173,19 @@ def _date_range(
         (_from_date, _until_date) = (_until_date, _until_date + step)
 
 
-def _es6_scan_all(es6_recordtype):
+def _es6_scan_all(es6_recordtype, query=None):
     return es6_helpers.scan(
-        es6_recordtype._get_connection(),
+        _es6_connection(),
         index=es6_recordtype._template_pattern,
+        query=query,
     )
 
 
 def _es6_scan_range(es6_recordtype, from_when: str, until_when: str):
     return es6_helpers.scan(
-        es6_recordtype._get_connection(),
+        _es6_connection(),
         index=es6_recordtype._template_pattern,
-        query={"range": {"timestamp": {"gte": from_when, "lt": until_when}}},
+        query={"query": {"range": {"timestamp": {"gte": from_when, "lt": until_when}}}},
     )
 
 
@@ -218,7 +242,7 @@ def _assert_field_unchangedness(es6_recordtype, es8_recordtype):
     _es6_fields = set(_get_es6_field_names(es6_recordtype))
     _es8_fields = set(es8_recordtype._get_field_names())
 
-    # remove fields intentionally removed/renamed in migration
+    # remove fields intentionally removed in migration
     if issubclass(es6_recordtype, es6_reports.DailyReport):
         assert issubclass(es8_recordtype, djel8me.CyclicRecord)
         _es6_fields.remove("timestamp")
@@ -250,17 +274,19 @@ def _semverish_from_date(given_date: str):
     return f"{_d.year}.{_d.month}.{_d.day}"
 
 
-def _convert_cyclicrecord_kwargs(es6_source: dict):
-    for _key, _val in es6_source.items():
-        if _key == "report_yearmonth":
-            # report_yearmonth converts to cycle_coverage Y.M
-            yield ("cycle_coverage", _semverish_from_yearmonth(_val))
-        elif _key == "report_date":
-            # report_date converts to cycle_coverage Y.M.D
-            yield ("cycle_coverage", _semverish_from_date(_val))
-        elif _key != "timestamp":
-            # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
-            yield (_key, _val)
+def _convert_unchanged_cyclicrecord_kwargs(es6_source: dict) -> dict:
+    def _each_kwarg():
+        for _key, _val in es6_source.items():
+            if _key == "report_yearmonth":
+                # report_yearmonth converts to cycle_coverage Y.M
+                yield ("cycle_coverage", _semverish_from_yearmonth(_val))
+            elif _key == "report_date":
+                # report_date converts to cycle_coverage Y.M.D
+                yield ("cycle_coverage", _semverish_from_date(_val))
+            elif _key != "timestamp":
+                # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
+                yield (_key, _val)
+    return dict(_each_kwarg())
 
 
 def _convert_counted_usage(source_dict) -> es8_metrics.OsfCountedUsageRecord:
@@ -276,19 +302,40 @@ def _convert_counted_usage(source_dict) -> es8_metrics.OsfCountedUsageRecord:
             _item_iri,  # correct mistake; make inclusive-within aggregations easier
             *(
                 _iri_from_osfid(_within_osfid)
-                for _within_osfid in source_dict["surrounding_guids"]
+                for _within_osfid in source_dict.get("surrounding_guids", ())
             ),
         ],
         # fields from OsfCountedUsageRecord
         item_osfid=source_dict["item_guid"],
         item_type=_convert_item_type(source_dict),
         item_public=source_dict["item_public"],
-        provider_id=source_dict["provider_id"],
+        provider_id=source_dict.get("provider_id"),
         user_is_authenticated=source_dict["user_is_authenticated"],
         action_labels=source_dict["action_labels"],
-        pageview_info=source_dict[
-            "pageview_info"
-        ],  # TODO: does this need the PageviewInfo object?
+        # TODO: does this need the PageviewInfo object?
+        pageview_info=source_dict.get("pageview_info"),
+    )
+
+
+def _convert_preprint_metric(source_dict, action_labels: list[str]) -> es8_metrics.OsfCountedUsageRecord:
+    _preprint_iri = _iri_from_osfid(source_dict["preprint_id"])
+    return es8_metrics.OsfCountedUsageRecord.record(
+        using=False,  # don't save yet; will save in bulk
+        # fields used to compute a sessionhour_id:
+        timestamp=source_dict["timestamp"],
+        user_id=source_dict['user_id'],  # TODO: handle None?
+        # fields from djelme.CountedUsageRecord:
+        platform_iri=website_settings.DOMAIN,
+        # TODO: database_iri=provider iri
+        item_iri=_preprint_iri,
+        within_iris=[_preprint_iri],
+        # fields from OsfCountedUsageRecord:
+        item_osfid=source_dict["preprint_id"],
+        item_type=rdfutils.OSF.Preprint,
+        item_public=True,
+        provider_id=source_dict["provider_id"],
+        user_is_authenticated=bool(source_dict["user_id"]),
+        action_labels=action_labels,
     )
 
 
@@ -301,7 +348,11 @@ def _convert_item_type(es6_usage_dict):
 
     previous item_types use `type(osf_model).__name__.lower()`
     """
-    _modelname = es6_usage_dict["item_type"]
+    try:
+        _modelname = es6_usage_dict["item_type"]
+    except KeyError:
+        # this probably only happens in fake data
+        return None
     assert isinstance(_modelname, str)
     match _modelname:
         case "osfuser":
@@ -320,11 +371,11 @@ def _convert_item_type(es6_usage_dict):
                 if es6_usage_dict.get("surrounding_guids")
                 else rdfutils.OSF.Project
             )
-        case _ if "file" in _modelname:
+        case _ if "file" in _modelname:  # hack for the many "filenode" models
             return rdfutils.OSF.File
-        case _:
+        case _:  # give up gracefully
             _logger.error(f"unknown item type: {_modelname}")
-            return _modelname  # give up
+            return _modelname
 
 
 ###
@@ -406,13 +457,13 @@ def _handle_usage_events(self, *, start: bool):
         # for counted-usage events:
         # TODO: last X months only
         # display counts for each view/download event type
-        _es6_pview_count = PreprintViewEs6.search().count()
-        _es6_pdownload_count = PreprintDownloadEs6.search().count()
+        _es6_pview_count = PreprintView.search().count()
+        _es6_pdownload_count = PreprintDownload.search().count()
         _es6_usage_event_count = CountedUsageEs6.search().count()
         _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
         _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
-        self._write_tabbed("es6", PreprintViewEs6, _es6_pview_count)
-        self._write_tabbed("es6", PreprintDownloadEs6, _es6_pdownload_count)
+        self._write_tabbed("es6", PreprintView, _es6_pview_count)
+        self._write_tabbed("es6", PreprintDownload, _es6_pdownload_count)
         self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
         self._write_tabbed("es6", "(total to migrate)", _es6_count)
         self._write_tabbed(
@@ -425,8 +476,8 @@ def _handle_usage_events(self, *, start: bool):
             self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord}")
             _started = self._migration_started_at
             _range_start = (
-                _started - datetime.timedelta(months=_USAGE_MONTHS_BACK)
-            ).date
+                _started - datetime.timedelta(days=_USAGE_DAYS_BACK)
+            ).date()
             _range_end = _started.date() + datetime.timedelta(days=1)
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 3be81e9262e..1824fcf2b3f 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -92,9 +92,10 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     provider_id: str
     user_is_authenticated: bool
     action_labels: list[str]
-    pageview_info: PageviewInfo
+    pageview_info: PageviewInfo | None
 
-    def save(self, *args, **kwargs):
+    def clean(self):
+        super().clean()
         # autofill pageview_info fields
         if self.pageview_info:
             self.pageview_info.hour_of_day = self.timestamp.hour
@@ -104,7 +105,9 @@ def save(self, *args, **kwargs):
             _ref_url = self.pageview_info.referer_url
             if _ref_url:
                 self.pageview_info.referer_domain = urlsplit(_ref_url).netloc
-        super().save(*args, **kwargs)
+        # ensure inclusive "within"
+        if self.item_iri not in self.within_iris:
+            self.within_iris = [self.item_iri, *self.within_iris]
 
 
 class ActionLabel(enum.Enum):
diff --git a/poetry.lock b/poetry.lock
index df08934ef29..09ee8c9749b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"
-resolved_reference = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"
+reference = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"
+resolved_reference = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "d149bb933fd3845714e26920360c34f3224ab0f84a789b3185cf716033a8d4bf"
+content-hash = "9aea963ca1a8b23c8e07fa22b34dc23c0f53d1d017edf29aad65a733ab4832fe"
diff --git a/pyproject.toml b/pyproject.toml
index 4b6f896f39e..a0a08b48047 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "d7e0483972a58b940bec843679c2a8c9b8bcb75c"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 68b38bae8483eb349f785105dd887617e1b046d6 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 17 Apr 2026 17:28:12 -0400
Subject: [PATCH 051/100] wip

---
 .../commands/migrate_osfmetrics_6to8.py       | 338 ++++++++++++++----
 osf/metrics/es8_metrics.py                    |  55 ++-
 poetry.lock                                   |   6 +-
 pyproject.toml                                |   2 +-
 4 files changed, 320 insertions(+), 81 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index f0e1147f025..acbc43df5dd 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -6,14 +6,18 @@
 
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
+from django.db import OperationalError as DjangoOperationalError
+from elasticsearch6.exceptions import ConnectionError as Elastic6ConnectionError
 from elasticsearch6 import helpers as es6_helpers
 from elasticsearch6_dsl.connections import connections as es6_connections
+from elasticsearch8.exceptions import ConnectionError as Elastic8ConnectionError
 from elasticsearch8.dsl.connections import connections as es8_connections
 from elasticsearch_metrics.registry import djelme_registry
 from elasticsearch_metrics.imps import elastic8 as djel8me
+from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
-from osf.metadata import rdfutils
+from osf.metadata.rdfutils import OSF, DCTERMS
 from osf.metrics.preprint_metrics import (
     PreprintView,
     PreprintDownload,
@@ -21,7 +25,9 @@
 from osf.metrics.counted_usage import CountedAuthUsage as CountedUsageEs6
 from osf.metrics import reports as es6_reports
 from osf.metrics import es8_metrics, RegistriesModerationMetrics
+from osf.metrics.reporters.public_item_usage import _iter_composite_bucket_keys
 from osf.metrics.utils import YearMonth
+from osf import models as osfdb
 from website import settings as website_settings
 
 
@@ -52,12 +58,22 @@
     RegistriesModerationMetrics: es8_metrics.RegistriesModerationMetricsEs8,
 }
 
+_TASK_KWARGS = dict(
+    autoretry_for=(
+        DjangoOperationalError,
+        Elastic6ConnectionError,
+        Elastic8ConnectionError,
+        PostgresOperationalError,
+    ),
+    max_retries=50,
+    retry_backoff=True,
+)
 
 ###
 # celery tasks
 
 
-# TODO: @celery_app.task
+@celery_app.task(**_TASK_KWARGS)
 def migrate_unchanged_recordtype(es6_recordtype_name: str):
     _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
     _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
@@ -71,66 +87,75 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str):
         _es8_recordtype(**_convert_kwargs(_hit["_source"]))
         for _hit in _es6_scan_all(_es6_recordtype)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(_es8_recordtype, _each_new)
+    # _debug_migrate(_each_new)
+    return _es8_bulk_save(_es8_recordtype, _each_new)
 
 
-# TODO: @celery_app.task
+@celery_app.task(**_TASK_KWARGS)
 def migrate_counted_usages(from_when: str, until_when: str):
     # CountedAuthUsage => OsfCountedUsageRecord
     _each_new = (
         _convert_counted_usage(_hit["_source"])
-        for _hit in _es6_scan_range(CountedUsageEs6, from_when, until_when)
+        for _hit in _es6_scan_range(
+            CountedUsageEs6,
+            from_when,
+            until_when,
+            addl_filter={"exists": {"field": "item_guid"}},
+        )
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    # _debug_migrate(_each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
-# TODO: @celery_app.task
+@celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_views(from_when: str, until_when: str):
     # PreprintView => OsfCountedUsageRecord
-    _action_labels = ['view', 'web']
+    _action_labels = ["view", "web"]
     _each_new = (
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintView, from_when, until_when)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    # _debug_migrate(_each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
-# TODO: @celery_app.task
+@celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_downloads(from_when: str, until_when: str):
     # PreprintDownload => OsfCountedUsageRecord
-    _action_labels = ['download']
+    _action_labels = ["download"]
     _each_new = (
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintDownload, from_when, until_when)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    # _debug_migrate(_each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
-# TODO: @celery_app.task
+@celery_app.task(**_TASK_KWARGS)
 def migrate_usage_reports(osfid: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
     # add cumulative count
     def _each_new():
-        for _hit in _es6_scan_all(CountedUsageEs6, query=...):
-            yield ...(_hit["_source"])
+        for _hit in _es6_scan_all(
+            es6_reports.PublicItemUsageReport,
+            query_body={"query": {"term": {"item_osfid": osfid}}},
+        ):
+            yield _convert_public_usage_report(_hit["_source"])
 
-    _debug_migrate(_each_new)
-    # TODO: return _es8_bulk_save(PublicItemUsageReportEs8, _each_new)
+    # _debug_migrate(_each_new)
+    return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new)
 
 
 ###
 # various helper functions
 
+
 def _es6_connection():
-    return es6_connections.get_connection('osfmetrics_es6')
+    return es6_connections.get_connection("osfmetrics_es6")
 
 
 def _es8_connection():
-    return es8_connections.get_connection('osfmetrics_es8')
+    return es8_connections.get_connection("osfmetrics_es8")
 
 
 def _delete_all(recordtype):
@@ -173,19 +198,24 @@ def _date_range(
         (_from_date, _until_date) = (_until_date, _until_date + step)
 
 
-def _es6_scan_all(es6_recordtype, query=None):
+def _es6_scan_all(es6_recordtype, query_body=None):
     return es6_helpers.scan(
         _es6_connection(),
         index=es6_recordtype._template_pattern,
-        query=query,
+        query=query_body,
     )
 
 
-def _es6_scan_range(es6_recordtype, from_when: str, until_when: str):
+def _es6_scan_range(es6_recordtype, from_when: str, until_when: str, addl_filter=None):
+    _filters = [
+        {"range": {"timestamp": {"gte": from_when, "lt": until_when}}},
+    ]
+    if addl_filter:
+        _filters.append(addl_filter)
     return es6_helpers.scan(
         _es6_connection(),
         index=es6_recordtype._template_pattern,
-        query={"query": {"range": {"timestamp": {"gte": from_when, "lt": until_when}}}},
+        query={"query": {"bool": {"filter": _filters}}},
     )
 
 
@@ -286,59 +316,182 @@ def _each_kwarg():
             elif _key != "timestamp":
                 # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
                 yield (_key, _val)
+
     return dict(_each_kwarg())
 
 
 def _convert_counted_usage(source_dict) -> es8_metrics.OsfCountedUsageRecord:
     _item_iri = _iri_from_osfid(source_dict["item_guid"])
+    _item_type = _convert_item_type(source_dict)
     return es8_metrics.OsfCountedUsageRecord(
-        # fields from djelme.CountedUsageRecord
+        # fields from djelme.CountedUsageRecord:
         timestamp=source_dict["timestamp"],
         sessionhour_id=source_dict["session_id"],
         platform_iri=source_dict["platform_iri"],
-        # TODO: database_iri=provider iri
+        database_iri=_convert_database_iri(source_dict.get("provider_id"), _item_type),
         item_iri=_item_iri,
         within_iris=[
-            _item_iri,  # correct mistake; make inclusive-within aggregations easier
-            *(
-                _iri_from_osfid(_within_osfid)
-                for _within_osfid in source_dict.get("surrounding_guids", ())
-            ),
+            _iri_from_osfid(_within_osfid)
+            for _within_osfid in source_dict.get("surrounding_guids", ())
         ],
-        # fields from OsfCountedUsageRecord
+        # fields from OsfCountedUsageRecord:
         item_osfid=source_dict["item_guid"],
-        item_type=_convert_item_type(source_dict),
+        item_type=_item_type,
         item_public=source_dict["item_public"],
         provider_id=source_dict.get("provider_id"),
         user_is_authenticated=source_dict["user_is_authenticated"],
         action_labels=source_dict["action_labels"],
-        # TODO: does this need the PageviewInfo object?
+        # TODO: does this need the PageviewInfo object or is the dictionary fine?
         pageview_info=source_dict.get("pageview_info"),
     )
 
 
-def _convert_preprint_metric(source_dict, action_labels: list[str]) -> es8_metrics.OsfCountedUsageRecord:
+def _convert_preprint_metric(
+    source_dict, action_labels: list[str]
+) -> es8_metrics.OsfCountedUsageRecord:
     _preprint_iri = _iri_from_osfid(source_dict["preprint_id"])
     return es8_metrics.OsfCountedUsageRecord.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
         timestamp=source_dict["timestamp"],
-        user_id=source_dict['user_id'],  # TODO: handle None?
+        user_id=source_dict.get("user_id"),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
-        # TODO: database_iri=provider iri
+        database_iri=_convert_database_iri(
+            source_dict.get("provider_id"), OSF.Preprint
+        ),
         item_iri=_preprint_iri,
         within_iris=[_preprint_iri],
         # fields from OsfCountedUsageRecord:
         item_osfid=source_dict["preprint_id"],
-        item_type=rdfutils.OSF.Preprint,
+        item_type=OSF.Preprint,
         item_public=True,
-        provider_id=source_dict["provider_id"],
-        user_is_authenticated=bool(source_dict["user_id"]),
+        provider_id=source_dict.get("provider_id"),
+        user_is_authenticated=bool(source_dict.get("user_id")),
         action_labels=action_labels,
     )
 
 
+def _convert_public_usage_report(source_dict) -> es8_metrics.PublicItemUsageReportEs8:
+    _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
+        osfid=source_dict["item_osfid"],
+        until_when=YearMonth.from_str(source_dict["report_yearmonth"]).month_end(),
+        item_type=source_dict.get("item_type"),
+    )
+    return es8_metrics.PublicItemUsageReportEs8(
+        item_osfid=source_dict["item_osfid"],
+        item_type=source_dict.get("item_type"),
+        provider_id=source_dict.get("provider_id"),
+        platform_iri=source_dict.get("platform_iri"),
+        view_count=source_dict.get("view_count"),
+        view_session_count=source_dict.get("view_session_count"),
+        cumulative_view_count=_c_views,
+        cumulative_view_session_count=_c_view_sess,
+        download_count=source_dict.get("download_count"),
+        download_session_count=source_dict.get("download_session_count"),
+        cumulative_download_count=_c_downloads,
+        cumulative_download_session_count=_c_download_sess,
+    )
+
+
+def _get_cumulative_usage(osfid: str, until_when, item_type: str | None):
+    if item_type == "preprint":
+        _views = _cumulative_preprint_count(PreprintView, osfid, until_when)
+        _downloads = _cumulative_preprint_count(PreprintDownload, osfid, until_when)
+        _view_sess, _download_sess = 0, 0  # no session info on preprints (yet)
+    else:
+        _views, _view_sess = _cumulative_countedusage_views(osfid, until_when)
+        _downloads, _download_sess = _cumulative_countedusage_downloads(
+            osfid, until_when
+        )
+    return (_views, _view_sess, _downloads, _download_sess)
+
+
+def _cumulative_countedusage_views(
+    osfid: str, until_when: str
+) -> tuple[int, int]:
+    """compute view_session_count separately to avoid double-counting
+
+    (the same session may be represented in both the composite agg on `item_guid`
+    and that on `surrounding_guids`)
+    """
+    # copied/adapted from osf.metrics.reporters.public_item_usage
+    _search = (
+        CountedUsageEs6.search()
+        .filter("term", item_public=True)
+        .filter("range", timestamp={"lt": until_when})
+        .filter("term", action_labels="view")
+        .filter(
+            "bool",
+            should=[
+                {"term": {"item_guid": osfid}},
+                {"term": {"surrounding_guids": osfid}},
+            ],
+            minimum_should_match=1,
+        )
+        .extra(size=0)  # only aggregations, no hits
+    )
+    _search.aggs.metric(
+        "agg_session_count",
+        "cardinality",
+        field="session_id",
+        precision_threshold=_MAX_CARDINALITY_PRECISION,
+    )
+    _response = _search.execute()
+    _view_count = _response.hits.total
+    _view_session_count = (
+        _response.aggregations.agg_session_count.value
+        if "agg_session_count" in _response.aggregations
+        else 0
+    )
+    return (_view_count, _view_session_count)
+
+
+def _cumulative_countedusage_downloads(osfid, until_when) -> tuple[int, int]:
+    """aggregate downloads on each osfid (not including components/files)"""
+    # copied/adapted from osf.metrics.reporters.public_item_usage
+    _search = (
+        CountedUsageEs6.search()
+        .filter("term", item_public=True)
+        .filter("range", timestamp={"lt": until_when})
+        .filter("term", action_labels="download")
+        .filter("term", item_guid=osfid)
+    )
+    _search.aggs.metric(
+        "agg_session_count",
+        "cardinality",
+        field="session_id",
+        precision_threshold=_MAX_CARDINALITY_PRECISION,
+    )
+    _response = _search.execute()
+    _download_count = _response.hits.total
+    _download_session_count = (
+        _response.aggregations.agg_session_count.value
+        if "agg_session_count" in _response.aggregations
+        else 0
+    )
+    return (_download_count, _download_session_count)
+
+
+def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str) -> int:
+    """aggregate views on each preprint"""
+    # copied/adapted from osf.metrics.preprint_metrics
+    _search = (
+        preprint_metric_cls.search()
+        .filter("term", preprint_id=osfid)
+        .filter("range", timestamp={"lt": until_when})
+        .extra(size=0)  # no hits; only aggs
+    )
+    _search.aggs.metric("agg_count", "sum", field="count")
+    _response = _search.execute()
+    _view_count = (
+        int(_response.aggregations.agg_count.value)
+        if hasattr(_response.aggregations, "agg_count")
+        else 0
+    )
+    return _view_count
+
+
 def _iri_from_osfid(osfid: str) -> str:
     return f"{website_settings.DOMAIN}{osfid}"
 
@@ -348,34 +501,83 @@ def _convert_item_type(es6_usage_dict):
 
     previous item_types use `type(osf_model).__name__.lower()`
     """
-    try:
-        _modelname = es6_usage_dict["item_type"]
-    except KeyError:
-        # this probably only happens in fake data
-        return None
-    assert isinstance(_modelname, str)
+    _modelname = es6_usage_dict.get("item_type")
     match _modelname:
+        case "" | None:
+            return OSF.Object
         case "osfuser":
-            return rdfutils.DCTERMS.Agent
+            return DCTERMS.Agent
         case "preprint":
-            return rdfutils.OSF.Preprint
+            return OSF.Preprint
         case "registration":
             return (
-                rdfutils.OSF.RegistrationComponent
+                OSF.RegistrationComponent
                 if es6_usage_dict.get("surrounding_guids")
-                else rdfutils.OSF.Registration
+                else OSF.Registration
             )
         case "node":
             return (
-                rdfutils.OSF.ProjectComponent
+                OSF.ProjectComponent
                 if es6_usage_dict.get("surrounding_guids")
-                else rdfutils.OSF.Project
+                else OSF.Project
             )
         case _ if "file" in _modelname:  # hack for the many "filenode" models
-            return rdfutils.OSF.File
+            return OSF.File
         case _:  # give up gracefully
-            _logger.error(f"unknown item type: {_modelname}")
-            return _modelname
+            return OSF.Object
+
+
+@functools.lru_cache
+def _convert_database_iri(provider_id: str | None, item_type_iri: str) -> str:
+    if not provider_id:
+        return website_settings.DOMAIN  # osf is a provider, sure why not
+
+    def _fallback_iri():
+        return f"urn:osf.io:{provider_id}"
+
+    match item_type_iri:
+        case OSF.ProjectComponent | OSF.Project | DCTERMS.Agent:
+            # implicit "osf" provider
+            return website_settings.DOMAIN
+        case OSF.Preprint:
+            try:
+                _provider = osfdb.PreprintProvider.objects.get(_id=provider_id)
+            except osfdb.PreprintProvider.DoesNotExist:
+                _logger.error(f"unknown preprint provider {provider_id!r}")
+                return _fallback_iri()
+            else:
+                return _provider.get_semantic_iri()
+        case OSF.RegistrationComponent | OSF.Registration:
+            try:
+                _provider = osfdb.RegistrationProvider.objects.get(_id=provider_id)
+            except osfdb.RegistrationProvider.DoesNotExist:
+                _logger.error(f"unknown registration provider {provider_id!r}")
+                return _fallback_iri()
+            else:
+                return _provider.get_semantic_iri()
+        case OSF.File:
+            # file providers are a different thing that don't really have an iri, just an id
+            return _fallback_iri()
+        case _:  # give up gracefully
+            _logger.error(
+                f"unknown item type {item_type_iri!r} with provider {provider_id!r}"
+            )
+            return _fallback_iri()
+
+
+def _each_usage_report_osfid(started_at, after_osfid=None):
+    _search = (
+        es6_reports.PublicItemUsageReport.search()
+        .filter("range", timestamp={"lt": started_at})
+        .extra(size=0)
+    )
+    _search.aggs.bucket(
+        "agg_osfid",
+        "composite",
+        sources=[{"osfid": {"terms": {"field": "item_osfid"}}}],
+        size=500,
+    )
+    return _iter_composite_bucket_keys(_search, "agg_osfid", "osfid", after=after_osfid)
 
 
 ###
@@ -449,8 +651,7 @@ def _handle_unchanged(self, *, start: bool):
             )
             if start:  # schedule task
                 self._write_tabbed("starting", _es6_cls, "=>", _es8_cls)
-                migrate_unchanged_recordtype(_es6_cls.__name__)
-                # TODO: migrate_unchanged_recordtype.apply_async(...)
+                migrate_unchanged_recordtype.delay(_es6_cls.__name__)
             self.stdout.write("---")
 
     def _handle_usage_events(self, *, start: bool):
@@ -475,17 +676,14 @@ def _handle_usage_events(self, *, start: bool):
         if start:  # schedule (per-day?) tasks (if --start)
             self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord}")
             _started = self._migration_started_at
-            _range_start = (
-                _started - datetime.timedelta(days=_USAGE_DAYS_BACK)
-            ).date()
+            _range_start = (_started - datetime.timedelta(days=_USAGE_DAYS_BACK)).date()
             _range_end = _started.date() + datetime.timedelta(days=1)
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
                 _until_str = _until_date.isoformat()
-                # TODO: .apply_async(...)
-                migrate_counted_usages(_from_str, _until_str)
-                migrate_preprint_views(_from_str, _until_str)
-                migrate_preprint_downloads(_from_str, _until_str)
+                migrate_counted_usages.delay(_from_str, _until_str)
+                migrate_preprint_views.delay(_from_str, _until_str)
+                migrate_preprint_downloads.delay(_from_str, _until_str)
         self.stdout.write("---")
 
     def _handle_usage_reports(self, *, start: bool):
@@ -515,7 +713,11 @@ def _handle_usage_reports(self, *, start: bool):
             self.stdout.write(
                 f"starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}"
             )
-            # TODO: migrate_usage_reports.apply_async(...)
+            for _osfid in _each_usage_report_osfid(
+                started_at=self._migration_started_at
+            ):
+                migrate_usage_reports(_osfid)
+                # TODO: migrate_usage_reports.apply_async(...)
         self.stdout.write("---")
 
     @functools.cached_property
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 1824fcf2b3f..fd8475b1bc3 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -77,19 +77,24 @@ class PageviewInfo(esdsl.InnerDoc):
 
 class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     '''
-
-    inherited fields:
-        platform_iri: str
-        database_iri: str
-        item_iri: str
-        sessionhour_id: str
-        within_iris: list[str]
+    Aim to support a COUNTER-style reporting api
+    https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
+    https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
     '''
-    # osf-specific fields
+
+    # inherited fields:
+    #     timestamp: datetime.datetime
+    #     platform_iri: str
+    #     database_iri: str
+    #     item_iri: str
+    #     sessionhour_id: str
+    #     within_iris: list[str]
+
+    # osf-specific fields:
     item_osfid: str
     item_type: str
     item_public: bool
-    provider_id: str
+    provider_id: str | None
     user_is_authenticated: bool
     action_labels: list[str]
     pageview_info: PageviewInfo | None
@@ -109,6 +114,38 @@ def clean(self):
         if self.item_iri not in self.within_iris:
             self.within_iris = [self.item_iri, *self.within_iris]
 
+    def _get_unique_together_values(self):
+        """get "unique together" values for "ON CONFLICT UPDATE" behavior
+
+        override djelme.BaseDjelmeRecord._get_unique_together_values
+        for more complex logic than UNIQUE_TOGETHER_FIELDS
+        to slightly better approximate `counter:Double-Click Filtering`
+        """
+        # note: copied from osf.metrics.counted_usage._fill_document_id
+        target_identifier = (
+            self.pageview_info.page_url
+            if self.pageview_info is not None and self.pageview_info.page_url is not None
+            else self.item_osfid
+        )
+        # slice the day into an array of 30-second windows,
+        # find this timestamp's windowslice index
+        day_start = datetime.datetime(
+            self.timestamp.year,
+            self.timestamp.month,
+            self.timestamp.day,
+            tzinfo=datetime.UTC,
+        )
+        time_in_seconds = (self.timestamp - day_start).total_seconds()
+        time_window = int(time_in_seconds / 30)  # 30-second windows
+        return (  # unique-together values:
+            self.platform_iri,
+            target_identifier,
+            self.sessionhour_id,
+            self.timestamp.date(),
+            time_window,
+            ','.join(sorted(self.action_labels)),
+        )
+
 
 class ActionLabel(enum.Enum):
     SEARCH = 'search'  # counter:Search
diff --git a/poetry.lock b/poetry.lock
index 09ee8c9749b..14113d228b3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"
-resolved_reference = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"
+reference = "a1e00e468830a40758caa8afa4b838821471f5c1"
+resolved_reference = "a1e00e468830a40758caa8afa4b838821471f5c1"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "9aea963ca1a8b23c8e07fa22b34dc23c0f53d1d017edf29aad65a733ab4832fe"
+content-hash = "1ba293f397fef29212fc58bfb8e08753f64bf43471a6fd2eb9d71bfded4ae326"
diff --git a/pyproject.toml b/pyproject.toml
index a0a08b48047..f7e6eb5bb41 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "445fcea0aa6b5d07523cd67e959cb14088f15bb0"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "a1e00e468830a40758caa8afa4b838821471f5c1"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 69daa8744cdc5d47c3ba3fec571c564624d39dbd Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 10:11:50 -0400
Subject: [PATCH 052/100] wip

---
 docker-compose.yml                            |   7 +-
 .../commands/fake_metrics_reports.py          |  19 +++
 .../commands/migrate_osfmetrics_6to8.py       | 150 ++++++++++--------
 osf/metrics/es8_metrics.py                    |  41 ++++-
 poetry.lock                                   |   8 +-
 pyproject.toml                                |   2 +-
 website/settings/defaults.py                  |   1 +
 7 files changed, 157 insertions(+), 71 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 83e8fd27483..d771c75797a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -72,6 +72,8 @@ services:
   # Temporary: Remove when we've upgraded to ES6
   elasticsearch6:
     image: docker.elastic.co/elasticsearch/elasticsearch:6.3.1
+    environment:
+      - ES_JAVA_OPTS=-Xms512m -Xmx512m  # reduce memory usage
     ports:
       - 9201:9200
     volumes:
@@ -91,10 +93,9 @@ services:
       - elasticsearch8_data_vol:/usr/share/elasticsearch/data
     healthcheck:
       start_period: 15s
-      test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"]
+      test: curl -s http://localhost:9200/_cluster/health | grep -vq '"status":"red"'
       interval: 10s
-      timeout: 30s
-      retries: 5
+      retries: 30
     stdin_open: true
 
   postgres:
diff --git a/osf/management/commands/fake_metrics_reports.py b/osf/management/commands/fake_metrics_reports.py
index 765d6e475c1..53e13472e74 100644
--- a/osf/management/commands/fake_metrics_reports.py
+++ b/osf/management/commands/fake_metrics_reports.py
@@ -8,6 +8,8 @@
     UserSummaryReport,
     PreprintSummaryReport,
 )
+from osf.metrics.reports import PublicItemUsageReport
+from osf.metrics.utils import YearMonth
 from osf.models import PreprintProvider
 
 
@@ -53,10 +55,27 @@ def fake_preprint_counts(days_back):
             ).save()
 
 
+def fake_usage_reports(osfid: str, count: int):
+    _ym = YearMonth.from_date(date.today()).prior()
+    for _months in range(count):
+        PublicItemUsageReport.record(
+            item_osfid=osfid,
+            report_yearmonth=_ym,
+            view_count=(_vc := randint(0, 500)),
+            view_session_count=randint(0, _vc),
+            download_count=(_dc := randint(0, 300)),
+            download_session_count=randint(0, _dc),
+        )
+        _ym = _ym.prior()
+
+
 class Command(BaseCommand):
     def handle(self, *args, **kwargs):
         if not settings.DEBUG:
             raise NotImplementedError('fake_reports requires DEBUG mode')
         fake_user_counts(1000)
         fake_preprint_counts(1000)
+        fake_usage_reports('blarg', 100)
+        fake_usage_reports('blerg', 50)
+        fake_usage_reports('bleg', 50)
         # TODO: more reports
diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index acbc43df5dd..c72765ab261 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -65,8 +65,8 @@
         Elastic8ConnectionError,
         PostgresOperationalError,
     ),
-    max_retries=50,
-    retry_backoff=True,
+    retry_backoff=True,  # exponential backoff, with jitter
+    max_retries=20,
 )
 
 ###
@@ -87,8 +87,8 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str):
         _es8_recordtype(**_convert_kwargs(_hit["_source"]))
         for _hit in _es6_scan_all(_es6_recordtype)
     )
-    # _debug_migrate(_each_new)
-    return _es8_bulk_save(_es8_recordtype, _each_new)
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(_es8_recordtype, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -103,8 +103,8 @@ def migrate_counted_usages(from_when: str, until_when: str):
             addl_filter={"exists": {"field": "item_guid"}},
         )
     )
-    # _debug_migrate(_each_new)
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _debug_migrate(_each_new)
+    #return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -115,8 +115,8 @@ def migrate_preprint_views(from_when: str, until_when: str):
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintView, from_when, until_when)
     )
-    # _debug_migrate(_each_new)
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -127,8 +127,8 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintDownload, from_when, until_when)
     )
-    # _debug_migrate(_each_new)
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _debug_migrate(_each_new)
+    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -136,14 +136,23 @@ def migrate_usage_reports(osfid: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
     # add cumulative count
     def _each_new():
-        for _hit in _es6_scan_all(
+        _each_hit = _es6_scan_all(
             es6_reports.PublicItemUsageReport,
             query_body={"query": {"term": {"item_osfid": osfid}}},
-        ):
-            yield _convert_public_usage_report(_hit["_source"])
+        )
+        # only a few dozen of these per item; fine to hold all at once
+        _sorted_sources = sorted(
+            (_hit["_source"] for _hit in _each_hit),
+            key=lambda _s: _s["report_yearmonth"],
+        )
+        _prior_report = None
+        for _source in _sorted_sources:
+            yield (
+                _prior_report := _convert_public_usage_report(_source, _prior_report)
+            )
 
-    # _debug_migrate(_each_new)
-    return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new)
+    _debug_migrate(_each_new())
+    # return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new)
 
 
 ###
@@ -175,6 +184,7 @@ def _delete_all_es8():
 def _debug_migrate(each_new):
     # TODO: remove this
     for _each in each_new:
+        _each.full_clean()
         pprint(_each.to_dict(include_meta=True))
 
 
@@ -320,75 +330,89 @@ def _each_kwarg():
     return dict(_each_kwarg())
 
 
-def _convert_counted_usage(source_dict) -> es8_metrics.OsfCountedUsageRecord:
-    _item_iri = _iri_from_osfid(source_dict["item_guid"])
-    _item_type = _convert_item_type(source_dict)
+def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
+    _item_iri = _iri_from_osfid(source["item_guid"])
+    _item_type = _convert_item_type(source)
     return es8_metrics.OsfCountedUsageRecord(
         # fields from djelme.CountedUsageRecord:
-        timestamp=source_dict["timestamp"],
-        sessionhour_id=source_dict["session_id"],
-        platform_iri=source_dict["platform_iri"],
-        database_iri=_convert_database_iri(source_dict.get("provider_id"), _item_type),
+        timestamp=source["timestamp"],
+        sessionhour_id=source["session_id"],
+        platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
+        database_iri=_convert_database_iri(source.get("provider_id"), _item_type),
         item_iri=_item_iri,
         within_iris=[
             _iri_from_osfid(_within_osfid)
-            for _within_osfid in source_dict.get("surrounding_guids", ())
+            for _within_osfid in source.get("surrounding_guids", ())
         ],
         # fields from OsfCountedUsageRecord:
-        item_osfid=source_dict["item_guid"],
+        item_osfid=source["item_guid"],
         item_type=_item_type,
-        item_public=source_dict["item_public"],
-        provider_id=source_dict.get("provider_id"),
-        user_is_authenticated=source_dict["user_is_authenticated"],
-        action_labels=source_dict["action_labels"],
+        item_public=source["item_public"],
+        provider_id=source.get("provider_id"),
+        user_is_authenticated=source["user_is_authenticated"],
+        action_labels=source["action_labels"],
         # TODO: does this need the PageviewInfo object or is the dictionary fine?
-        pageview_info=source_dict.get("pageview_info"),
+        pageview_info=source.get("pageview_info"),
     )
 
 
 def _convert_preprint_metric(
-    source_dict, action_labels: list[str]
+    source: dict, action_labels: list[str]
 ) -> es8_metrics.OsfCountedUsageRecord:
-    _preprint_iri = _iri_from_osfid(source_dict["preprint_id"])
+    _preprint_iri = _iri_from_osfid(source["preprint_id"])
     return es8_metrics.OsfCountedUsageRecord.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
-        timestamp=source_dict["timestamp"],
-        user_id=source_dict.get("user_id"),
+        timestamp=source["timestamp"],
+        user_id=source.get("user_id"),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
-        database_iri=_convert_database_iri(
-            source_dict.get("provider_id"), OSF.Preprint
-        ),
+        database_iri=_convert_database_iri(source.get("provider_id"), OSF.Preprint),
         item_iri=_preprint_iri,
         within_iris=[_preprint_iri],
         # fields from OsfCountedUsageRecord:
-        item_osfid=source_dict["preprint_id"],
+        item_osfid=source["preprint_id"],
         item_type=OSF.Preprint,
         item_public=True,
-        provider_id=source_dict.get("provider_id"),
-        user_is_authenticated=bool(source_dict.get("user_id")),
+        provider_id=source.get("provider_id"),
+        user_is_authenticated=bool(source.get("user_id")),
         action_labels=action_labels,
     )
 
 
-def _convert_public_usage_report(source_dict) -> es8_metrics.PublicItemUsageReportEs8:
-    _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
-        osfid=source_dict["item_osfid"],
-        until_when=YearMonth.from_str(source_dict["report_yearmonth"]).month_end(),
-        item_type=source_dict.get("item_type"),
-    )
+def _convert_public_usage_report(
+    source: dict,
+    prior_report: es8_metrics.PublicItemUsageReportEs8 | None,
+) -> es8_metrics.PublicItemUsageReportEs8:
+    if prior_report is None:
+        _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
+            osfid=source["item_osfid"],
+            until_when=YearMonth.from_str(source["report_yearmonth"]).month_end(),
+            item_type=source.get("item_type"),
+        )
+    else:
+        _c_views = prior_report.cumulative_view_count + source.get("view_count", 0)
+        _c_view_sess = prior_report.cumulative_view_session_count + source.get(
+            "view_session_count", 0
+        )
+        _c_downloads = prior_report.cumulative_download_count + source.get(
+            "download_count", 0
+        )
+        _c_download_sess = prior_report.cumulative_download_session_count + source.get(
+            "download_session_count", 0
+        )
     return es8_metrics.PublicItemUsageReportEs8(
-        item_osfid=source_dict["item_osfid"],
-        item_type=source_dict.get("item_type"),
-        provider_id=source_dict.get("provider_id"),
-        platform_iri=source_dict.get("platform_iri"),
-        view_count=source_dict.get("view_count"),
-        view_session_count=source_dict.get("view_session_count"),
+        cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
+        item_osfid=source["item_osfid"],
+        item_type=source.get("item_type"),
+        provider_id=source.get("provider_id"),
+        platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
+        view_count=source.get("view_count"),
+        view_session_count=source.get("view_session_count"),
         cumulative_view_count=_c_views,
         cumulative_view_session_count=_c_view_sess,
-        download_count=source_dict.get("download_count"),
-        download_session_count=source_dict.get("download_session_count"),
+        download_count=source.get("download_count"),
+        download_session_count=source.get("download_session_count"),
         cumulative_download_count=_c_downloads,
         cumulative_download_session_count=_c_download_sess,
     )
@@ -407,9 +431,7 @@ def _get_cumulative_usage(osfid: str, until_when, item_type: str | None):
     return (_views, _view_sess, _downloads, _download_sess)
 
 
-def _cumulative_countedusage_views(
-    osfid: str, until_when: str
-) -> tuple[int, int]:
+def _cumulative_countedusage_views(osfid: str, until_when: str) -> tuple[int, int]:
     """compute view_session_count separately to avoid double-counting
 
     (the same session may be represented in both the composite agg on `item_guid`
@@ -651,7 +673,8 @@ def _handle_unchanged(self, *, start: bool):
             )
             if start:  # schedule task
                 self._write_tabbed("starting", _es6_cls, "=>", _es8_cls)
-                migrate_unchanged_recordtype.delay(_es6_cls.__name__)
+                #migrate_unchanged_recordtype.delay(_es6_cls.__name__)
+                migrate_unchanged_recordtype(_es6_cls.__name__)
             self.stdout.write("---")
 
     def _handle_usage_events(self, *, start: bool):
@@ -681,9 +704,12 @@ def _handle_usage_events(self, *, start: bool):
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
                 _until_str = _until_date.isoformat()
-                migrate_counted_usages.delay(_from_str, _until_str)
-                migrate_preprint_views.delay(_from_str, _until_str)
-                migrate_preprint_downloads.delay(_from_str, _until_str)
+                # migrate_counted_usages.delay(_from_str, _until_str)
+                # migrate_preprint_views.delay(_from_str, _until_str)
+                # migrate_preprint_downloads.delay(_from_str, _until_str)
+                migrate_counted_usages(_from_str, _until_str)
+                migrate_preprint_views(_from_str, _until_str)
+                migrate_preprint_downloads(_from_str, _until_str)
         self.stdout.write("---")
 
     def _handle_usage_reports(self, *, start: bool):
@@ -709,7 +735,7 @@ def _handle_usage_reports(self, *, start: bool):
         )
         # (if --start) schedule task per item (by composite agg on es6 public usage reports)
         # each item-task iter thru reports oldest to newest, adding cumulative counts
-        if start:  # schedule per-item tasks
+        if start:
             self.stdout.write(
                 f"starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}"
             )
@@ -717,7 +743,7 @@ def _handle_usage_reports(self, *, start: bool):
                 started_at=self._migration_started_at
             ):
                 migrate_usage_reports(_osfid)
-                # TODO: migrate_usage_reports.apply_async(...)
+                # TODO: migrate_usage_reports.delay(...)
         self.stdout.write("---")
 
     @functools.cached_property
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index fd8475b1bc3..67fee676112 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -3,7 +3,7 @@
 from urllib.parse import urlsplit
 
 import elasticsearch8.dsl as esdsl
-from elasticsearch_metrics import DAILY, MONTHLY
+from elasticsearch_metrics import DAILY, MONTHLY, YEARLY
 import elasticsearch_metrics.imps.elastic8 as djelme
 
 from osf.metrics.utils import YearMonth
@@ -233,12 +233,18 @@ class StorageAddonUsageEs8(djelme.CyclicRecord):
 
     usage_by_addon: list[UsageByStorageAddon]
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class DownloadCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     daily_file_downloads: int
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class InstitutionSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
@@ -252,6 +258,9 @@ class InstitutionSummaryReportEs8(djelme.CyclicRecord):
     registered_nodes: RegistrationRunningTotals
     registered_projects: RegistrationRunningTotals
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
 
 class NewUserDomainReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
@@ -260,6 +269,9 @@ class NewUserDomainReportEs8(djelme.CyclicRecord):
     domain_name: str
     new_user_count: int
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
 
 class NodeSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
@@ -269,12 +281,18 @@ class NodeSummaryReportEs8(djelme.CyclicRecord):
     registered_nodes: RegistrationRunningTotals
     registered_projects: RegistrationRunningTotals
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class OsfstorageFileCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     files: FileRunningTotals
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class PreprintSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
@@ -283,6 +301,9 @@ class PreprintSummaryReportEs8(djelme.CyclicRecord):
     provider_key: str
     preprint_count: int
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
 
 class UserSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
@@ -294,6 +315,9 @@ class UserSummaryReportEs8(djelme.CyclicRecord):
     new_users_with_institution_daily: int
     unconfirmed: int
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class SpamSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
@@ -310,6 +334,9 @@ class SpamSummaryReportEs8(djelme.CyclicRecord):
     user_marked_as_spam: int
     user_marked_as_ham: int
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class InstitutionalUserReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
@@ -333,6 +360,9 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
     public_file_count: int = esdsl.mapped_field(esdsl.Long())
     storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
 
 class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
@@ -350,6 +380,9 @@ class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
     monthly_logged_in_user_count: int = esdsl.mapped_field(esdsl.Long())
     monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long())
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 class PublicItemUsageReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
@@ -375,6 +408,9 @@ class PublicItemUsageReportEs8(djelme.CyclicRecord):
     cumulative_download_count: int = esdsl.mapped_field(esdsl.Long())
     cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long())
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
 
 class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
@@ -388,6 +424,9 @@ class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
     preprint_akismet_flagged: int
     preprint_akismet_hammed: int
 
+    class Meta:
+        timeseries_index_timedepth = YEARLY
+
 
 ###
 # data migration state
diff --git a/poetry.lock b/poetry.lock
index 14113d228b3..7aee4eca49f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1085,7 +1085,7 @@ Django = ">=2.0"
 
 [[package]]
 name = "django-elasticsearch-metrics"
-version = "2026.0.3"
+version = "2026.0.4"
 description = "Django app for storing time-series metrics in Elasticsearch."
 optional = false
 python-versions = ">=3.10,<4"
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "a1e00e468830a40758caa8afa4b838821471f5c1"
-resolved_reference = "a1e00e468830a40758caa8afa4b838821471f5c1"
+reference = "fed3c14f213642284a197ac2933106cdafede25b"
+resolved_reference = "fed3c14f213642284a197ac2933106cdafede25b"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "1ba293f397fef29212fc58bfb8e08753f64bf43471a6fd2eb9d71bfded4ae326"
+content-hash = "0f9c547a6309aa915b25f9a7a98e5d0c15c867d577a883547d894ca173cb2344"
diff --git a/pyproject.toml b/pyproject.toml
index f7e6eb5bb41..b04e0540d90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "a1e00e468830a40758caa8afa4b838821471f5c1"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "fed3c14f213642284a197ac2933106cdafede25b"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 3053f9d1075..dc69126ca37 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -557,6 +557,7 @@ class CeleryConfig:
         task_routes = ('framework.celery_tasks.routers.CeleryRouter', )
         task_ignore_result = True
         task_store_errors_even_if_ignored = True
+        result_extended = True
 
     broker_url = os.environ.get('BROKER_URL', f'amqp://{RABBITMQ_USERNAME}:{RABBITMQ_PASSWORD}@{RABBITMQ_HOST}:{RABBITMQ_PORT}/{RABBITMQ_VHOST}')
     broker_use_ssl = False

From da7910a86760bfd9a5d581f2c9b35692d5d30670 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 12:11:55 -0400
Subject: [PATCH 053/100] wip

---
 .../commands/migrate_osfmetrics_6to8.py       | 248 ++++++++----------
 osf/metrics/es8_metrics.py                    |  24 +-
 website/settings/defaults.py                  |   1 +
 3 files changed, 125 insertions(+), 148 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index c72765ab261..5ee937e80c0 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -17,7 +17,6 @@
 from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
-from osf.metadata.rdfutils import OSF, DCTERMS
 from osf.metrics.preprint_metrics import (
     PreprintView,
     PreprintDownload,
@@ -87,8 +86,7 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str):
         _es8_recordtype(**_convert_kwargs(_hit["_source"]))
         for _hit in _es6_scan_all(_es6_recordtype)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(_es8_recordtype, _each_new)
+    return _es8_bulk_save(_es8_recordtype, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -103,8 +101,7 @@ def migrate_counted_usages(from_when: str, until_when: str):
             addl_filter={"exists": {"field": "item_guid"}},
         )
     )
-    _debug_migrate(_each_new)
-    #return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -115,8 +112,7 @@ def migrate_preprint_views(from_when: str, until_when: str):
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintView, from_when, until_when)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -127,32 +123,31 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
         _convert_preprint_metric(_hit["_source"], _action_labels)
         for _hit in _es6_scan_range(PreprintDownload, from_when, until_when)
     )
-    _debug_migrate(_each_new)
-    # return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_usage_reports(osfid: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
-    # add cumulative count
     def _each_new():
+        # go in sorted order to build cumulative counts
+        # (only a few dozen of these per item; should be fine to sort and load all at once)
         _each_hit = _es6_scan_all(
             es6_reports.PublicItemUsageReport,
-            query_body={"query": {"term": {"item_osfid": osfid}}},
-        )
-        # only a few dozen of these per item; fine to hold all at once
-        _sorted_sources = sorted(
-            (_hit["_source"] for _hit in _each_hit),
-            key=lambda _s: _s["report_yearmonth"],
+            query_body={
+                "query": {"term": {"item_osfid": osfid}},
+                "sort": "report_yearmonth",
+            },
         )
         _prior_report = None
-        for _source in _sorted_sources:
+        for _hit in list(_each_hit):
             yield (
-                _prior_report := _convert_public_usage_report(_source, _prior_report)
+                _prior_report := _convert_public_usage_report(
+                    _hit["_source"], _prior_report
+                )
             )
 
-    _debug_migrate(_each_new())
-    # return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new)
+    return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new())
 
 
 ###
@@ -332,13 +327,12 @@ def _each_kwarg():
 
 def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
     _item_iri = _iri_from_osfid(source["item_guid"])
-    _item_type = _convert_item_type(source)
     return es8_metrics.OsfCountedUsageRecord(
         # fields from djelme.CountedUsageRecord:
         timestamp=source["timestamp"],
         sessionhour_id=source["session_id"],
         platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
-        database_iri=_convert_database_iri(source.get("provider_id"), _item_type),
+        database_iri=_convert_database_iri(source.get("provider_id"), source.get("item_type")),
         item_iri=_item_iri,
         within_iris=[
             _iri_from_osfid(_within_osfid)
@@ -346,11 +340,11 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
         ],
         # fields from OsfCountedUsageRecord:
         item_osfid=source["item_guid"],
-        item_type=_item_type,
-        item_public=source["item_public"],
+        item_type=source.get("item_type", "osf:Object"),
+        item_public=source.get("item_public"),
         provider_id=source.get("provider_id"),
-        user_is_authenticated=source["user_is_authenticated"],
-        action_labels=source["action_labels"],
+        user_is_authenticated=source.get("user_is_authenticated"),
+        action_labels=source.get("action_labels"),
         # TODO: does this need the PageviewInfo object or is the dictionary fine?
         pageview_info=source.get("pageview_info"),
     )
@@ -367,12 +361,12 @@ def _convert_preprint_metric(
         user_id=source.get("user_id"),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
-        database_iri=_convert_database_iri(source.get("provider_id"), OSF.Preprint),
+        database_iri=_convert_database_iri(source.get("provider_id"), "preprint"),
         item_iri=_preprint_iri,
         within_iris=[_preprint_iri],
         # fields from OsfCountedUsageRecord:
         item_osfid=source["preprint_id"],
-        item_type=OSF.Preprint,
+        item_type="preprint",
         item_public=True,
         provider_id=source.get("provider_id"),
         user_is_authenticated=bool(source.get("user_id")),
@@ -402,7 +396,7 @@ def _convert_public_usage_report(
             "download_session_count", 0
         )
     return es8_metrics.PublicItemUsageReportEs8(
-        cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
+        cycle_coverage=_semverish_from_yearmonth(source["report_yearmonth"]),
         item_osfid=source["item_osfid"],
         item_type=source.get("item_type"),
         provider_id=source.get("provider_id"),
@@ -518,50 +512,19 @@ def _iri_from_osfid(osfid: str) -> str:
     return f"{website_settings.DOMAIN}{osfid}"
 
 
-def _convert_item_type(es6_usage_dict):
-    """convert model-name item types to OSFMAP item types
-
-    previous item_types use `type(osf_model).__name__.lower()`
-    """
-    _modelname = es6_usage_dict.get("item_type")
-    match _modelname:
-        case "" | None:
-            return OSF.Object
-        case "osfuser":
-            return DCTERMS.Agent
-        case "preprint":
-            return OSF.Preprint
-        case "registration":
-            return (
-                OSF.RegistrationComponent
-                if es6_usage_dict.get("surrounding_guids")
-                else OSF.Registration
-            )
-        case "node":
-            return (
-                OSF.ProjectComponent
-                if es6_usage_dict.get("surrounding_guids")
-                else OSF.Project
-            )
-        case _ if "file" in _modelname:  # hack for the many "filenode" models
-            return OSF.File
-        case _:  # give up gracefully
-            return OSF.Object
-
-
 @functools.lru_cache
-def _convert_database_iri(provider_id: str | None, item_type_iri: str) -> str:
+def _convert_database_iri(provider_id: str | None, item_type: str) -> str:
     if not provider_id:
         return website_settings.DOMAIN  # osf is a provider, sure why not
 
     def _fallback_iri():
         return f"urn:osf.io:{provider_id}"
 
-    match item_type_iri:
-        case OSF.ProjectComponent | OSF.Project | DCTERMS.Agent:
+    match item_type:  # lower-cased osf.models class names
+        case "node" | "osfuser":
             # implicit "osf" provider
             return website_settings.DOMAIN
-        case OSF.Preprint:
+        case "preprint":
             try:
                 _provider = osfdb.PreprintProvider.objects.get(_id=provider_id)
             except osfdb.PreprintProvider.DoesNotExist:
@@ -569,7 +532,7 @@ def _fallback_iri():
                 return _fallback_iri()
             else:
                 return _provider.get_semantic_iri()
-        case OSF.RegistrationComponent | OSF.Registration:
+        case "registration":
             try:
                 _provider = osfdb.RegistrationProvider.objects.get(_id=provider_id)
             except osfdb.RegistrationProvider.DoesNotExist:
@@ -577,12 +540,12 @@ def _fallback_iri():
                 return _fallback_iri()
             else:
                 return _provider.get_semantic_iri()
-        case OSF.File:
+        case _ if "file" in item_type:
             # file providers are a different thing that don't really have an iri, just an id
-            return _fallback_iri()
+            return f"urn:files.osf.io:{provider_id}"
         case _:  # give up gracefully
             _logger.error(
-                f"unknown item type {item_type_iri!r} with provider {provider_id!r}"
+                f"unknown item type {item_type!r} with provider {provider_id!r}"
             )
             return _fallback_iri()
 
@@ -612,6 +575,10 @@ def add_arguments(self, parser):
             "--no-setup",
             action="store_true",
         )
+        parser.add_argument(
+            "--no-counts",
+            action="store_true",
+        )
         parser.add_argument(
             "--clear-state",
             action="store_true",
@@ -636,12 +603,13 @@ def add_arguments(self, parser):
     def handle(
         self,
         *,
+        no_setup,
+        no_counts,
+        clear_state,
         start,
         unchanged,
         usage_events,
         usage_reports,
-        clear_state,
-        no_setup,
         **kwargs,
     ):
         self._quiet_chatty_loggers()
@@ -652,99 +620,94 @@ def handle(
         self._check_started_at(start_now=start)
         _default_all = not any((unchanged, usage_events, usage_reports))
         if unchanged or _default_all:
-            self._handle_unchanged(start=start)
+            self._handle_unchanged(start=start, no_counts=no_counts)
         if usage_events or _default_all:
-            self._handle_usage_events(start=start)
+            self._handle_usage_events(start=start, no_counts=no_counts)
         if usage_reports or _default_all:
-            self._handle_usage_reports(start=start)
+            self._handle_usage_reports(start=start, no_counts=no_counts)
 
-    def _handle_unchanged(self, *, start: bool):
+    def _handle_unchanged(self, *, start: bool, no_counts: bool):
         # for each (unchanged) report/event:
         for _es6_cls, _es8_cls in _UNCHANGED_RECORDTYPES.items():
-            # display counts
-            _es6_count = _es6_cls.search().count()
-            _es8_count = _es8_cls.search().count()
-            self._write_tabbed("es6", _es6_cls, _es6_count)
+            if not no_counts:
+                # display counts
+                _es6_count = _es6_cls.search().count()
+                _es8_count = _es8_cls.search().count()
+                self._write_tabbed("es6", _es6_cls, _es6_count)
+                self._write_tabbed(
+                    "es8",
+                    _es8_cls,
+                    _es8_count,
+                    style=self._eq_style(_es8_count, _es6_count),
+                )
+            if start:  # schedule task
+                self.stdout.write(f"starting {_es6_cls.__name__} => {_es8_cls.__name__}")
+                migrate_unchanged_recordtype.delay(_es6_cls.__name__)
+
+    def _handle_usage_events(self, *, start: bool, no_counts: bool):
+        # for counted-usage events:
+        _started = self._migration_started_at
+        _range_start = (_started - datetime.timedelta(days=_USAGE_DAYS_BACK)).date()
+        _range_end = _started.date() + datetime.timedelta(days=1)
+        if not no_counts:
+            # display counts for each view/download event type
+            _range_q = {"range": {"timestamp": {"gte": _range_start.isoformat(), "lt": _range_end.isoformat()}}}
+            _es6_pview_count = PreprintView.search().filter(_range_q).count()
+            _es6_pdownload_count = PreprintDownload.search().filter(_range_q).count()
+            _es6_usage_event_count = CountedUsageEs6.search().filter(_range_q).count()
+            _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
+            _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
+            self._write_tabbed("es6", PreprintView, _es6_pview_count)
+            self._write_tabbed("es6", PreprintDownload, _es6_pdownload_count)
+            self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
+            self._write_tabbed("es6", f"(total between {_range_start} and {_range_end})", _es6_count)
             self._write_tabbed(
                 "es8",
-                _es8_cls,
+                es8_metrics.OsfCountedUsageRecord,
                 _es8_count,
                 style=self._eq_style(_es8_count, _es6_count),
             )
-            if start:  # schedule task
-                self._write_tabbed("starting", _es6_cls, "=>", _es8_cls)
-                #migrate_unchanged_recordtype.delay(_es6_cls.__name__)
-                migrate_unchanged_recordtype(_es6_cls.__name__)
-            self.stdout.write("---")
-
-    def _handle_usage_events(self, *, start: bool):
-        # for counted-usage events:
-        # TODO: last X months only
-        # display counts for each view/download event type
-        _es6_pview_count = PreprintView.search().count()
-        _es6_pdownload_count = PreprintDownload.search().count()
-        _es6_usage_event_count = CountedUsageEs6.search().count()
-        _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
-        _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
-        self._write_tabbed("es6", PreprintView, _es6_pview_count)
-        self._write_tabbed("es6", PreprintDownload, _es6_pdownload_count)
-        self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
-        self._write_tabbed("es6", "(total to migrate)", _es6_count)
-        self._write_tabbed(
-            "es8",
-            es8_metrics.OsfCountedUsageRecord,
-            _es8_count,
-            style=self._eq_style(_es8_count, _es6_count),
-        )
         if start:  # schedule (per-day?) tasks (if --start)
-            self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord}")
-            _started = self._migration_started_at
-            _range_start = (_started - datetime.timedelta(days=_USAGE_DAYS_BACK)).date()
-            _range_end = _started.date() + datetime.timedelta(days=1)
+            self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}")
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
                 _until_str = _until_date.isoformat()
-                # migrate_counted_usages.delay(_from_str, _until_str)
-                # migrate_preprint_views.delay(_from_str, _until_str)
-                # migrate_preprint_downloads.delay(_from_str, _until_str)
-                migrate_counted_usages(_from_str, _until_str)
-                migrate_preprint_views(_from_str, _until_str)
-                migrate_preprint_downloads(_from_str, _until_str)
-        self.stdout.write("---")
-
-    def _handle_usage_reports(self, *, start: bool):
-        # display counts of reports and distinct items
-        _es6_count, _es6_item_count = _es6_usage_report_counts()
-        _es8_count, _es8_item_count = _es8_usage_report_counts()
-        self._write_tabbed("es6", es6_reports.PublicItemUsageReport, _es6_count)
-        self._write_tabbed(
-            "es8",
-            es8_metrics.PublicItemUsageReportEs8,
-            _es8_count,
-            style=self._eq_style(_es8_count, _es6_count),
-        )
-        self._write_tabbed(
-            "es6", es6_reports.PublicItemUsageReport, "(items)", _es6_item_count
-        )
-        self._write_tabbed(
-            "es8",
-            es8_metrics.PublicItemUsageReportEs8,
-            "(items)",
-            _es8_item_count,
-            style=self._eq_style(_es8_item_count, _es6_item_count),
-        )
+                migrate_counted_usages.delay(_from_str, _until_str)
+                migrate_preprint_views.delay(_from_str, _until_str)
+                migrate_preprint_downloads.delay(_from_str, _until_str)
+
+    def _handle_usage_reports(self, *, start: bool, no_counts: bool):
+        if not no_counts:
+            # display counts of reports and distinct items
+            _es6_count, _es6_item_count = _es6_usage_report_counts()
+            _es8_count, _es8_item_count = _es8_usage_report_counts()
+            self._write_tabbed("es6", es6_reports.PublicItemUsageReport, _es6_count)
+            self._write_tabbed(
+                "es8",
+                es8_metrics.PublicItemUsageReportEs8,
+                _es8_count,
+                style=self._eq_style(_es8_count, _es6_count),
+            )
+            self._write_tabbed(
+                "es6", es6_reports.PublicItemUsageReport, "osfid count:", _es6_item_count
+            )
+            self._write_tabbed(
+                "es8",
+                es8_metrics.PublicItemUsageReportEs8,
+                "(items)",
+                _es8_item_count,
+                style=self._eq_style(_es8_item_count, _es6_item_count),
+            )
         # (if --start) schedule task per item (by composite agg on es6 public usage reports)
         # each item-task iter thru reports oldest to newest, adding cumulative counts
         if start:
             self.stdout.write(
-                f"starting per-item {es6_reports.PublicItemUsageReport} => {es8_metrics.PublicItemUsageReportEs8}"
+                f"starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.PublicItemUsageReportEs8.__name__}"
             )
             for _osfid in _each_usage_report_osfid(
                 started_at=self._migration_started_at
             ):
-                migrate_usage_reports(_osfid)
-                # TODO: migrate_usage_reports.delay(...)
-        self.stdout.write("---")
+                migrate_usage_reports.delay(_osfid)
 
     @functools.cached_property
     def _migration_started_at(self):
@@ -757,8 +720,8 @@ def _check_started_at(self, start_now):
                 f"osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}"
             )
         elif start_now:
-            del self._migration_started_at  # clear cache
             _started_at = es8_metrics.Elastic6To8State.set_started_at_now()
+            del self._migration_started_at  # clear cache
             self.stdout.write(
                 f"osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}"
             )
@@ -766,7 +729,6 @@ def _check_started_at(self, start_now):
             self.stdout.write(
                 "osf.metrics 6->8 migration not started nor starting (run with `--start` to start)"
             )
-        self.stdout.write("---")
 
     def _clear_state(self):
         self.stdout.write(
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 67fee676112..2f4023105d8 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -162,7 +162,7 @@ class RegistriesModerationMetricsEs8(djelme.EventRecord):
     from_state: str
     to_state: str
     user_id: str
-    comment: str
+    comment: str | None
 
     class Index:
         settings = {
@@ -171,6 +171,9 @@ class Index:
             'refresh_interval': '1s',
         }
 
+    class Meta:
+        timeseries_recordtype_name = 'RegistriesModerationMetrics'
+
 
 ###
 # Reusable inner objects for reports
@@ -235,6 +238,7 @@ class StorageAddonUsageEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'StorageAddonUsage'
 
 
 class DownloadCountReportEs8(djelme.CyclicRecord):
@@ -244,6 +248,7 @@ class DownloadCountReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'DownloadCountReport'
 
 
 class InstitutionSummaryReportEs8(djelme.CyclicRecord):
@@ -260,6 +265,7 @@ class InstitutionSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = MONTHLY
+        timeseries_recordtype_name = 'InstitutionSummaryReport'
 
 
 class NewUserDomainReportEs8(djelme.CyclicRecord):
@@ -271,6 +277,7 @@ class NewUserDomainReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = MONTHLY
+        timeseries_recordtype_name = 'NewUserDomainReport'
 
 
 class NodeSummaryReportEs8(djelme.CyclicRecord):
@@ -283,6 +290,7 @@ class NodeSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'NodeSummaryReport'
 
 
 class OsfstorageFileCountReportEs8(djelme.CyclicRecord):
@@ -292,6 +300,7 @@ class OsfstorageFileCountReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'OsfstorageFileCountReport'
 
 
 class PreprintSummaryReportEs8(djelme.CyclicRecord):
@@ -303,6 +312,7 @@ class PreprintSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = MONTHLY
+        timeseries_recordtype_name = 'PreprintSummaryReport'
 
 
 class UserSummaryReportEs8(djelme.CyclicRecord):
@@ -317,6 +327,7 @@ class UserSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'UserSummaryReport'
 
 
 class SpamSummaryReportEs8(djelme.CyclicRecord):
@@ -336,6 +347,7 @@ class SpamSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'SpamSummaryReport'
 
 
 class InstitutionalUserReportEs8(djelme.CyclicRecord):
@@ -350,7 +362,7 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
     month_last_login = YearmonthField()
     month_last_active = YearmonthField()
     account_creation_date = YearmonthField()
-    orcid_id: str
+    orcid_id: str | None
     # counts:
     public_project_count: int
     private_project_count: int
@@ -362,6 +374,7 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = MONTHLY
+        timeseries_recordtype_name = 'InstitutionalUserReport'
 
 
 class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
@@ -382,6 +395,7 @@ class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'InstitutionMonthlySummaryReport'
 
 
 class PublicItemUsageReportEs8(djelme.CyclicRecord):
@@ -410,6 +424,7 @@ class PublicItemUsageReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = MONTHLY
+        timeseries_recordtype_name = 'PublicItemUsageReport'
 
 
 class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
@@ -426,6 +441,7 @@ class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
+        timeseries_recordtype_name = 'PrivateSpamMetricsReport'
 
 
 ###
@@ -440,9 +456,6 @@ class Elastic6To8State(djelme.SimpleRecord):
         default_factory=lambda: datetime.datetime.now(datetime.UTC),
     )
 
-    class Index:
-        name = 'osf_elastic6to8state'
-
     @classmethod
     def get_by_key(cls, key: str):
         _response = cls.search().query({'term': {'key': key}})[0].execute()
@@ -460,4 +473,5 @@ def get_started_at(cls):
     @classmethod
     def set_started_at_now(cls):
         _record = cls.record(key='started_at')
+        cls.refresh()
         return _record.timestamp
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index dc69126ca37..2d174472576 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -608,6 +608,7 @@ class CeleryConfig:
         'scripts.remove_after_use.merge_notification_subscription_provider_ct',
         'scripts.disable_removed_beat_tasks',
         'osf.management.commands.delete_withdrawn_or_failed_registration_files',
+        'osf.management.commands.migrate_osfmetrics_6to8',
     )
 
     # Modules that need metrics and release requirements

From 95b42e600b11fb2b4f8e51dd6e80b53756575bab Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 12:48:39 -0400
Subject: [PATCH 054/100] wip

---
 docker-compose.yml                            |   5 +
 .../commands/migrate_osfmetrics_6to8.py       | 147 +++++++++---------
 poetry.lock                                   |   6 +-
 pyproject.toml                                |   2 +-
 4 files changed, 84 insertions(+), 76 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index d771c75797a..04d64c51fda 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -78,6 +78,11 @@ services:
       - 9201:9200
     volumes:
       - elasticsearch6_data_vol:/usr/share/elasticsearch/data
+    healthcheck:
+      start_period: 15s
+      test: curl -s http://localhost:9200/_cluster/health | grep -vq '"status":"red"'
+      interval: 10s
+      retries: 30
     stdin_open: true
 
   elasticsearch8:
diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 5ee937e80c0..5ce383b99bc 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -2,7 +2,6 @@
 import datetime
 import functools
 import logging
-from pprint import pprint
 
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
@@ -11,7 +10,6 @@
 from elasticsearch6 import helpers as es6_helpers
 from elasticsearch6_dsl.connections import connections as es6_connections
 from elasticsearch8.exceptions import ConnectionError as Elastic8ConnectionError
-from elasticsearch8.dsl.connections import connections as es8_connections
 from elasticsearch_metrics.registry import djelme_registry
 from elasticsearch_metrics.imps import elastic8 as djel8me
 from psycopg2 import OperationalError as PostgresOperationalError
@@ -73,10 +71,9 @@
 
 
 @celery_app.task(**_TASK_KWARGS)
-def migrate_unchanged_recordtype(es6_recordtype_name: str):
+def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
     _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
     _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
-    _assert_field_unchangedness(_es6_recordtype, _es8_recordtype)
     _convert_kwargs = (
         _convert_unchanged_cyclicrecord_kwargs
         if issubclass(_es8_recordtype, djel8me.CyclicRecord)
@@ -84,7 +81,7 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str):
     )
     _each_new = (
         _es8_recordtype(**_convert_kwargs(_hit["_source"]))
-        for _hit in _es6_scan_all(_es6_recordtype)
+        for _hit in _es6_scan_range(_es6_recordtype, until_when=until_when)
     )
     return _es8_bulk_save(_es8_recordtype, _each_new)
 
@@ -96,8 +93,8 @@ def migrate_counted_usages(from_when: str, until_when: str):
         _convert_counted_usage(_hit["_source"])
         for _hit in _es6_scan_range(
             CountedUsageEs6,
-            from_when,
-            until_when,
+            from_when=from_when,
+            until_when=until_when,
             addl_filter={"exists": {"field": "item_guid"}},
         )
     )
@@ -110,7 +107,9 @@ def migrate_preprint_views(from_when: str, until_when: str):
     _action_labels = ["view", "web"]
     _each_new = (
         _convert_preprint_metric(_hit["_source"], _action_labels)
-        for _hit in _es6_scan_range(PreprintView, from_when, until_when)
+        for _hit in _es6_scan_range(
+            PreprintView, from_when=from_when, until_when=until_when
+        )
     )
     return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
@@ -121,23 +120,24 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
     _action_labels = ["download"]
     _each_new = (
         _convert_preprint_metric(_hit["_source"], _action_labels)
-        for _hit in _es6_scan_range(PreprintDownload, from_when, until_when)
+        for _hit in _es6_scan_range(
+            PreprintDownload, from_when=from_when, until_when=until_when
+        )
     )
     return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
-def migrate_usage_reports(osfid: str):
+def migrate_usage_reports(osfid: str, until_when: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
     def _each_new():
         # go in sorted order to build cumulative counts
         # (only a few dozen of these per item; should be fine to sort and load all at once)
-        _each_hit = _es6_scan_all(
+        _each_hit = _es6_scan_range(
             es6_reports.PublicItemUsageReport,
-            query_body={
-                "query": {"term": {"item_osfid": osfid}},
-                "sort": "report_yearmonth",
-            },
+            until_when=until_when,
+            addl_filter={"term": {"item_osfid": osfid}},
+            sort="report_yearmonth",
         )
         _prior_report = None
         for _hit in list(_each_hit):
@@ -158,31 +158,6 @@ def _es6_connection():
     return es6_connections.get_connection("osfmetrics_es6")
 
 
-def _es8_connection():
-    return es8_connections.get_connection("osfmetrics_es8")
-
-
-def _delete_all(recordtype):
-    # TODO: REMOVE THIS
-    recordtype.search().query({"match_all": {}}).delete()
-    recordtype.refresh()
-
-
-def _delete_all_es8():
-    # TODO: REMOVE THIS
-    for _es8_recordtype in _UNCHANGED_RECORDTYPES.values():
-        _delete_all(_es8_recordtype)
-    _delete_all(es8_metrics.PublicItemUsageReportEs8)
-    _delete_all(es8_metrics.OsfCountedUsageRecord)
-
-
-def _debug_migrate(each_new):
-    # TODO: remove this
-    for _each in each_new:
-        _each.full_clean()
-        pprint(_each.to_dict(include_meta=True))
-
-
 def _es8_bulk_save(es8_recordtype, each_new_record):
     _success_count, _fail_count = es8_recordtype.bulk(
         each_new_record,
@@ -203,24 +178,29 @@ def _date_range(
         (_from_date, _until_date) = (_until_date, _until_date + step)
 
 
-def _es6_scan_all(es6_recordtype, query_body=None):
-    return es6_helpers.scan(
-        _es6_connection(),
-        index=es6_recordtype._template_pattern,
-        query=query_body,
-    )
-
-
-def _es6_scan_range(es6_recordtype, from_when: str, until_when: str, addl_filter=None):
+def _es6_scan_range(
+    es6_recordtype,
+    *,
+    from_when: str = "",
+    until_when: str,
+    addl_filter=None,
+    sort=None,
+):
+    _timestamp_range = {"lt": until_when}
+    if from_when:
+        _timestamp_range["gte"] = from_when
     _filters = [
-        {"range": {"timestamp": {"gte": from_when, "lt": until_when}}},
+        {"range": {"timestamp": _timestamp_range}},
     ]
     if addl_filter:
         _filters.append(addl_filter)
+    _query_body = {"query": {"bool": {"filter": _filters}}}
+    if sort:
+        _query_body["sort"] = sort
     return es6_helpers.scan(
         _es6_connection(),
         index=es6_recordtype._template_pattern,
-        query={"query": {"bool": {"filter": _filters}}},
+        query=_query_body,
     )
 
 
@@ -332,7 +312,9 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
         timestamp=source["timestamp"],
         sessionhour_id=source["session_id"],
         platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
-        database_iri=_convert_database_iri(source.get("provider_id"), source.get("item_type")),
+        database_iri=_convert_database_iri(
+            source.get("provider_id"), source.get("item_type")
+        ),
         item_iri=_item_iri,
         within_iris=[
             _iri_from_osfid(_within_osfid)
@@ -345,7 +327,6 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
         provider_id=source.get("provider_id"),
         user_is_authenticated=source.get("user_is_authenticated"),
         action_labels=source.get("action_labels"),
-        # TODO: does this need the PageviewInfo object or is the dictionary fine?
         pageview_info=source.get("pageview_info"),
     )
 
@@ -550,10 +531,10 @@ def _fallback_iri():
             return _fallback_iri()
 
 
-def _each_usage_report_osfid(started_at, after_osfid=None):
+def _each_usage_report_osfid(until_when, after_osfid=None):
     _search = (
         es6_reports.PublicItemUsageReport.search()
-        .filter("range", timestamp={"lt": started_at})
+        .filter("range", timestamp={"lt": until_when})
         .extra(size=0)
     )
     _search.aggs.bucket(
@@ -600,6 +581,10 @@ def add_arguments(self, parser):
             action="store_true",
         )
 
+    @functools.cached_property
+    def _migration_started_at(self):
+        return es8_metrics.Elastic6To8State.get_started_at()
+
     def handle(
         self,
         *,
@@ -625,10 +610,13 @@ def handle(
             self._handle_usage_events(start=start, no_counts=no_counts)
         if usage_reports or _default_all:
             self._handle_usage_reports(start=start, no_counts=no_counts)
+        if not no_counts:
+            self.stdout.write("(counts may be approximate)")
 
     def _handle_unchanged(self, *, start: bool, no_counts: bool):
         # for each (unchanged) report/event:
         for _es6_cls, _es8_cls in _UNCHANGED_RECORDTYPES.items():
+            _assert_field_unchangedness(_es6_cls, _es8_cls)
             if not no_counts:
                 # display counts
                 _es6_count = _es6_cls.search().count()
@@ -641,26 +629,41 @@ def _handle_unchanged(self, *, start: bool, no_counts: bool):
                     style=self._eq_style(_es8_count, _es6_count),
                 )
             if start:  # schedule task
-                self.stdout.write(f"starting {_es6_cls.__name__} => {_es8_cls.__name__}")
-                migrate_unchanged_recordtype.delay(_es6_cls.__name__)
+                self.stdout.write(
+                    f"starting {_es6_cls.__name__} => {_es8_cls.__name__}"
+                )
+                migrate_unchanged_recordtype.delay(
+                    _es6_cls.__name__, self._migration_started_at.isoformat()
+                )
 
     def _handle_usage_events(self, *, start: bool, no_counts: bool):
         # for counted-usage events:
-        _started = self._migration_started_at
+        _started = self._migration_started_at or datetime.datetime.now()
         _range_start = (_started - datetime.timedelta(days=_USAGE_DAYS_BACK)).date()
         _range_end = _started.date() + datetime.timedelta(days=1)
         if not no_counts:
             # display counts for each view/download event type
-            _range_q = {"range": {"timestamp": {"gte": _range_start.isoformat(), "lt": _range_end.isoformat()}}}
+            _range_q = {
+                "range": {
+                    "timestamp": {
+                        "gte": _range_start.isoformat(),
+                        "lt": _range_end.isoformat(),
+                    }
+                }
+            }
             _es6_pview_count = PreprintView.search().filter(_range_q).count()
             _es6_pdownload_count = PreprintDownload.search().filter(_range_q).count()
             _es6_usage_event_count = CountedUsageEs6.search().filter(_range_q).count()
-            _es6_count = _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
+            _es6_count = (
+                _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
+            )
             _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
             self._write_tabbed("es6", PreprintView, _es6_pview_count)
             self._write_tabbed("es6", PreprintDownload, _es6_pdownload_count)
             self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
-            self._write_tabbed("es6", f"(total between {_range_start} and {_range_end})", _es6_count)
+            self._write_tabbed(
+                "es6", f"(total between {_range_start} and {_range_end})", _es6_count
+            )
             self._write_tabbed(
                 "es8",
                 es8_metrics.OsfCountedUsageRecord,
@@ -668,7 +671,9 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
                 style=self._eq_style(_es8_count, _es6_count),
             )
         if start:  # schedule (per-day?) tasks (if --start)
-            self.stdout.write(f"starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}")
+            self.stdout.write(
+                f"starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}"
+            )
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
                 _until_str = _until_date.isoformat()
@@ -689,7 +694,10 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
                 style=self._eq_style(_es8_count, _es6_count),
             )
             self._write_tabbed(
-                "es6", es6_reports.PublicItemUsageReport, "osfid count:", _es6_item_count
+                "es6",
+                es6_reports.PublicItemUsageReport,
+                "osfid count:",
+                _es6_item_count,
             )
             self._write_tabbed(
                 "es8",
@@ -705,13 +713,11 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
                 f"starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.PublicItemUsageReportEs8.__name__}"
             )
             for _osfid in _each_usage_report_osfid(
-                started_at=self._migration_started_at
+                until_when=self._migration_started_at
             ):
-                migrate_usage_reports.delay(_osfid)
-
-    @functools.cached_property
-    def _migration_started_at(self):
-        return es8_metrics.Elastic6To8State.get_started_at()
+                migrate_usage_reports.delay(
+                    _osfid, self._migration_started_at.isoformat()
+                )
 
     def _check_started_at(self, start_now):
         _started_at = self._migration_started_at
@@ -736,9 +742,6 @@ def _clear_state(self):
         )
         es8_metrics.Elastic6To8State.search().query({"match_all": {}}).delete()
         es8_metrics.Elastic6To8State.refresh()
-        # TODO: REMOVE THIS
-        self.stdout.write("deleting all migration target data in es8", self.style.ERROR)
-        _delete_all_es8()
 
     def _eq_style(self, num: int, should_be: int):
         return self.style.SUCCESS if (num == should_be) else self.style.WARNING
diff --git a/poetry.lock b/poetry.lock
index 7aee4eca49f..1aec6afa426 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "fed3c14f213642284a197ac2933106cdafede25b"
-resolved_reference = "fed3c14f213642284a197ac2933106cdafede25b"
+reference = "34c7b180e6d595b3374534cd50efb00f5a809582"
+resolved_reference = "34c7b180e6d595b3374534cd50efb00f5a809582"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "0f9c547a6309aa915b25f9a7a98e5d0c15c867d577a883547d894ca173cb2344"
+content-hash = "9edb43576b960885c14e32e9ae74218c28d883df48679868848dbaa5780c4b12"
diff --git a/pyproject.toml b/pyproject.toml
index b04e0540d90..815efdd61a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "fed3c14f213642284a197ac2933106cdafede25b"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "34c7b180e6d595b3374534cd50efb00f5a809582"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From bac21a0ba0d2973d5da7fdc1718cd13b5724f15c Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 13:23:15 -0400
Subject: [PATCH 055/100] chore: "fix' quotes

---
 .../commands/migrate_osfmetrics_6to8.py       | 338 +++++++++---------
 1 file changed, 169 insertions(+), 169 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 5ce383b99bc..ccc15834644 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -72,7 +72,7 @@
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
-    _es6_recordtype = djelme_registry.get_recordtype("osf", es6_recordtype_name)
+    _es6_recordtype = djelme_registry.get_recordtype('osf', es6_recordtype_name)
     _es8_recordtype = _UNCHANGED_RECORDTYPES[_es6_recordtype]
     _convert_kwargs = (
         _convert_unchanged_cyclicrecord_kwargs
@@ -80,7 +80,7 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
         else (lambda _kw: _kw)  # no conversion needed for event record
     )
     _each_new = (
-        _es8_recordtype(**_convert_kwargs(_hit["_source"]))
+        _es8_recordtype(**_convert_kwargs(_hit['_source']))
         for _hit in _es6_scan_range(_es6_recordtype, until_when=until_when)
     )
     return _es8_bulk_save(_es8_recordtype, _each_new)
@@ -90,12 +90,12 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
 def migrate_counted_usages(from_when: str, until_when: str):
     # CountedAuthUsage => OsfCountedUsageRecord
     _each_new = (
-        _convert_counted_usage(_hit["_source"])
+        _convert_counted_usage(_hit['_source'])
         for _hit in _es6_scan_range(
             CountedUsageEs6,
             from_when=from_when,
             until_when=until_when,
-            addl_filter={"exists": {"field": "item_guid"}},
+            addl_filter={'exists': {'field': 'item_guid'}},
         )
     )
     return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
@@ -104,9 +104,9 @@ def migrate_counted_usages(from_when: str, until_when: str):
 @celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_views(from_when: str, until_when: str):
     # PreprintView => OsfCountedUsageRecord
-    _action_labels = ["view", "web"]
+    _action_labels = ['view', 'web']
     _each_new = (
-        _convert_preprint_metric(_hit["_source"], _action_labels)
+        _convert_preprint_metric(_hit['_source'], _action_labels)
         for _hit in _es6_scan_range(
             PreprintView, from_when=from_when, until_when=until_when
         )
@@ -117,9 +117,9 @@ def migrate_preprint_views(from_when: str, until_when: str):
 @celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_downloads(from_when: str, until_when: str):
     # PreprintDownload => OsfCountedUsageRecord
-    _action_labels = ["download"]
+    _action_labels = ['download']
     _each_new = (
-        _convert_preprint_metric(_hit["_source"], _action_labels)
+        _convert_preprint_metric(_hit['_source'], _action_labels)
         for _hit in _es6_scan_range(
             PreprintDownload, from_when=from_when, until_when=until_when
         )
@@ -136,14 +136,14 @@ def _each_new():
         _each_hit = _es6_scan_range(
             es6_reports.PublicItemUsageReport,
             until_when=until_when,
-            addl_filter={"term": {"item_osfid": osfid}},
-            sort="report_yearmonth",
+            addl_filter={'term': {'item_osfid': osfid}},
+            sort='report_yearmonth',
         )
         _prior_report = None
         for _hit in list(_each_hit):
             yield (
                 _prior_report := _convert_public_usage_report(
-                    _hit["_source"], _prior_report
+                    _hit['_source'], _prior_report
                 )
             )
 
@@ -155,7 +155,7 @@ def _each_new():
 
 
 def _es6_connection():
-    return es6_connections.get_connection("osfmetrics_es6")
+    return es6_connections.get_connection('osfmetrics_es6')
 
 
 def _es8_bulk_save(es8_recordtype, each_new_record):
@@ -181,22 +181,22 @@ def _date_range(
 def _es6_scan_range(
     es6_recordtype,
     *,
-    from_when: str = "",
+    from_when: str = '',
     until_when: str,
     addl_filter=None,
     sort=None,
 ):
-    _timestamp_range = {"lt": until_when}
+    _timestamp_range = {'lt': until_when}
     if from_when:
-        _timestamp_range["gte"] = from_when
+        _timestamp_range['gte'] = from_when
     _filters = [
-        {"range": {"timestamp": _timestamp_range}},
+        {'range': {'timestamp': _timestamp_range}},
     ]
     if addl_filter:
         _filters.append(addl_filter)
-    _query_body = {"query": {"bool": {"filter": _filters}}}
+    _query_body = {'query': {'bool': {'filter': _filters}}}
     if sort:
-        _query_body["sort"] = sort
+        _query_body['sort'] = sort
     return es6_helpers.scan(
         _es6_connection(),
         index=es6_recordtype._template_pattern,
@@ -207,16 +207,16 @@ def _es6_scan_range(
 def _es6_usage_report_counts() -> tuple[int, int]:
     _search = es6_reports.PublicItemUsageReport.search()
     _search.aggs.metric(
-        "agg_item_count",
-        "cardinality",
-        field="item_osfid",
+        'agg_item_count',
+        'cardinality',
+        field='item_osfid',
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _total_count = _response.hits.total
     _item_count = (
         _response.aggregations.agg_item_count.value
-        if "agg_item_count" in _response.aggregations
+        if 'agg_item_count' in _response.aggregations
         else 0
     )
     return (_total_count, _item_count)
@@ -225,30 +225,30 @@ def _es6_usage_report_counts() -> tuple[int, int]:
 def _es8_usage_report_counts() -> tuple[int, int]:
     _search = es8_metrics.PublicItemUsageReportEs8.search()
     _search.aggs.metric(
-        "agg_item_count",
-        "cardinality",
-        field="item_osfid",
+        'agg_item_count',
+        'cardinality',
+        field='item_osfid',
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _total_count = _response.hits.total.value
     _item_count = (
         _response.aggregations.agg_item_count.value
-        if "agg_item_count" in _response.aggregations
+        if 'agg_item_count' in _response.aggregations
         else 0
     )
     return (_total_count, _item_count)
 
 
 def _get_es6_field_names(es6_recordtype):
-    """
+    '''
     adapted from DocumentBase._get_field_names in elasticsearch8.dsl
-    """
+    '''
     for _field_name in es6_recordtype._doc_type.mapping:
         _field = es6_recordtype._doc_type.mapping[_field_name]
-        if hasattr(_field, "_doc_class"):
+        if hasattr(_field, '_doc_class'):
             for _sub_field in _get_es6_field_names(_field._doc_class):
-                yield f"{_field_name}.{_sub_field}"
+                yield f'{_field_name}.{_sub_field}'
         else:
             yield _field_name
 
@@ -260,20 +260,20 @@ def _assert_field_unchangedness(es6_recordtype, es8_recordtype):
     # remove fields intentionally removed in migration
     if issubclass(es6_recordtype, es6_reports.DailyReport):
         assert issubclass(es8_recordtype, djel8me.CyclicRecord)
-        _es6_fields.remove("timestamp")
-        _es6_fields.remove("report_date")
+        _es6_fields.remove('timestamp')
+        _es6_fields.remove('report_date')
     elif issubclass(es6_recordtype, es6_reports.MonthlyReport):
         assert issubclass(es8_recordtype, djel8me.CyclicRecord)
-        _es6_fields.remove("timestamp")
-        _es6_fields.remove("report_yearmonth")
+        _es6_fields.remove('timestamp')
+        _es6_fields.remove('report_yearmonth')
     else:
         assert issubclass(es8_recordtype, djel8me.EventRecord)
 
     # remove fields intentionally added in migration
-    _es8_fields.remove("timeseries_timeparts")
+    _es8_fields.remove('timeseries_timeparts')
     if issubclass(es8_recordtype, djel8me.CyclicRecord):
-        _es8_fields.remove("created")
-        _es8_fields.remove("cycle_coverage")
+        _es8_fields.remove('created')
+        _es8_fields.remove('cycle_coverage')
 
     # all remaining fields should match
     assert _es6_fields == _es8_fields
@@ -281,24 +281,24 @@ def _assert_field_unchangedness(es6_recordtype, es8_recordtype):
 
 def _semverish_from_yearmonth(given_yearmonth: str):
     _ym = YearMonth.from_str(given_yearmonth)
-    return f"{_ym.year}.{_ym.month}"
+    return f'{_ym.year}.{_ym.month}'
 
 
 def _semverish_from_date(given_date: str):
     _d = datetime.date.fromisoformat(given_date)
-    return f"{_d.year}.{_d.month}.{_d.day}"
+    return f'{_d.year}.{_d.month}.{_d.day}'
 
 
 def _convert_unchanged_cyclicrecord_kwargs(es6_source: dict) -> dict:
     def _each_kwarg():
         for _key, _val in es6_source.items():
-            if _key == "report_yearmonth":
+            if _key == 'report_yearmonth':
                 # report_yearmonth converts to cycle_coverage Y.M
-                yield ("cycle_coverage", _semverish_from_yearmonth(_val))
-            elif _key == "report_date":
+                yield ('cycle_coverage', _semverish_from_yearmonth(_val))
+            elif _key == 'report_date':
                 # report_date converts to cycle_coverage Y.M.D
-                yield ("cycle_coverage", _semverish_from_date(_val))
-            elif _key != "timestamp":
+                yield ('cycle_coverage', _semverish_from_date(_val))
+            elif _key != 'timestamp':
                 # skipping timestamp; on daily/monthly reports just copied from yearmonth/date
                 yield (_key, _val)
 
@@ -306,51 +306,51 @@ def _each_kwarg():
 
 
 def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
-    _item_iri = _iri_from_osfid(source["item_guid"])
+    _item_iri = _iri_from_osfid(source['item_guid'])
     return es8_metrics.OsfCountedUsageRecord(
         # fields from djelme.CountedUsageRecord:
-        timestamp=source["timestamp"],
-        sessionhour_id=source["session_id"],
-        platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
+        timestamp=source['timestamp'],
+        sessionhour_id=source['session_id'],
+        platform_iri=source.get('platform_iri') or website_settings.DOMAIN,
         database_iri=_convert_database_iri(
-            source.get("provider_id"), source.get("item_type")
+            source.get('provider_id'), source.get('item_type')
         ),
         item_iri=_item_iri,
         within_iris=[
             _iri_from_osfid(_within_osfid)
-            for _within_osfid in source.get("surrounding_guids", ())
+            for _within_osfid in source.get('surrounding_guids', ())
         ],
         # fields from OsfCountedUsageRecord:
-        item_osfid=source["item_guid"],
-        item_type=source.get("item_type", "osf:Object"),
-        item_public=source.get("item_public"),
-        provider_id=source.get("provider_id"),
-        user_is_authenticated=source.get("user_is_authenticated"),
-        action_labels=source.get("action_labels"),
-        pageview_info=source.get("pageview_info"),
+        item_osfid=source['item_guid'],
+        item_type=source.get('item_type', 'osf:Object'),
+        item_public=source.get('item_public'),
+        provider_id=source.get('provider_id'),
+        user_is_authenticated=source.get('user_is_authenticated'),
+        action_labels=source.get('action_labels'),
+        pageview_info=source.get('pageview_info'),
     )
 
 
 def _convert_preprint_metric(
     source: dict, action_labels: list[str]
 ) -> es8_metrics.OsfCountedUsageRecord:
-    _preprint_iri = _iri_from_osfid(source["preprint_id"])
+    _preprint_iri = _iri_from_osfid(source['preprint_id'])
     return es8_metrics.OsfCountedUsageRecord.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
-        timestamp=source["timestamp"],
-        user_id=source.get("user_id"),
+        timestamp=source['timestamp'],
+        user_id=source.get('user_id'),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
-        database_iri=_convert_database_iri(source.get("provider_id"), "preprint"),
+        database_iri=_convert_database_iri(source.get('provider_id'), 'preprint'),
         item_iri=_preprint_iri,
         within_iris=[_preprint_iri],
         # fields from OsfCountedUsageRecord:
-        item_osfid=source["preprint_id"],
-        item_type="preprint",
+        item_osfid=source['preprint_id'],
+        item_type='preprint',
         item_public=True,
-        provider_id=source.get("provider_id"),
-        user_is_authenticated=bool(source.get("user_id")),
+        provider_id=source.get('provider_id'),
+        user_is_authenticated=bool(source.get('user_id')),
         action_labels=action_labels,
     )
 
@@ -361,40 +361,40 @@ def _convert_public_usage_report(
 ) -> es8_metrics.PublicItemUsageReportEs8:
     if prior_report is None:
         _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
-            osfid=source["item_osfid"],
-            until_when=YearMonth.from_str(source["report_yearmonth"]).month_end(),
-            item_type=source.get("item_type"),
+            osfid=source['item_osfid'],
+            until_when=YearMonth.from_str(source['report_yearmonth']).month_end(),
+            item_type=source.get('item_type'),
         )
     else:
-        _c_views = prior_report.cumulative_view_count + source.get("view_count", 0)
+        _c_views = prior_report.cumulative_view_count + source.get('view_count', 0)
         _c_view_sess = prior_report.cumulative_view_session_count + source.get(
-            "view_session_count", 0
+            'view_session_count', 0
         )
         _c_downloads = prior_report.cumulative_download_count + source.get(
-            "download_count", 0
+            'download_count', 0
         )
         _c_download_sess = prior_report.cumulative_download_session_count + source.get(
-            "download_session_count", 0
+            'download_session_count', 0
         )
     return es8_metrics.PublicItemUsageReportEs8(
-        cycle_coverage=_semverish_from_yearmonth(source["report_yearmonth"]),
-        item_osfid=source["item_osfid"],
-        item_type=source.get("item_type"),
-        provider_id=source.get("provider_id"),
-        platform_iri=source.get("platform_iri") or website_settings.DOMAIN,
-        view_count=source.get("view_count"),
-        view_session_count=source.get("view_session_count"),
+        cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
+        item_osfid=source['item_osfid'],
+        item_type=source.get('item_type'),
+        provider_id=source.get('provider_id'),
+        platform_iri=source.get('platform_iri') or website_settings.DOMAIN,
+        view_count=source.get('view_count'),
+        view_session_count=source.get('view_session_count'),
         cumulative_view_count=_c_views,
         cumulative_view_session_count=_c_view_sess,
-        download_count=source.get("download_count"),
-        download_session_count=source.get("download_session_count"),
+        download_count=source.get('download_count'),
+        download_session_count=source.get('download_session_count'),
         cumulative_download_count=_c_downloads,
         cumulative_download_session_count=_c_download_sess,
     )
 
 
 def _get_cumulative_usage(osfid: str, until_when, item_type: str | None):
-    if item_type == "preprint":
+    if item_type == 'preprint':
         _views = _cumulative_preprint_count(PreprintView, osfid, until_when)
         _downloads = _cumulative_preprint_count(PreprintDownload, osfid, until_when)
         _view_sess, _download_sess = 0, 0  # no session info on preprints (yet)
@@ -407,90 +407,90 @@ def _get_cumulative_usage(osfid: str, until_when, item_type: str | None):
 
 
 def _cumulative_countedusage_views(osfid: str, until_when: str) -> tuple[int, int]:
-    """compute view_session_count separately to avoid double-counting
+    '''compute view_session_count separately to avoid double-counting
 
     (the same session may be represented in both the composite agg on `item_guid`
     and that on `surrounding_guids`)
-    """
+    '''
     # copied/adapted from osf.metrics.reporters.public_item_usage
     _search = (
         CountedUsageEs6.search()
-        .filter("term", item_public=True)
-        .filter("range", timestamp={"lt": until_when})
-        .filter("term", action_labels="view")
+        .filter('term', item_public=True)
+        .filter('range', timestamp={'lt': until_when})
+        .filter('term', action_labels='view')
         .filter(
-            "bool",
+            'bool',
             should=[
-                {"term": {"item_guid": osfid}},
-                {"term": {"surrounding_guids": osfid}},
+                {'term': {'item_guid': osfid}},
+                {'term': {'surrounding_guids': osfid}},
             ],
             minimum_should_match=1,
         )
         .extra(size=0)  # only aggregations, no hits
     )
     _search.aggs.metric(
-        "agg_session_count",
-        "cardinality",
-        field="session_id",
+        'agg_session_count',
+        'cardinality',
+        field='session_id',
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _view_count = _response.hits.total
     _view_session_count = (
         _response.aggregations.agg_session_count.value
-        if "agg_session_count" in _response.aggregations
+        if 'agg_session_count' in _response.aggregations
         else 0
     )
     return (_view_count, _view_session_count)
 
 
 def _cumulative_countedusage_downloads(osfid, until_when) -> tuple[int, int]:
-    """aggregate downloads on each osfid (not including components/files)"""
+    '''aggregate downloads on each osfid (not including components/files)'''
     # copied/adapted from osf.metrics.reporters.public_item_usage
     _search = (
         CountedUsageEs6.search()
-        .filter("term", item_public=True)
-        .filter("range", timestamp={"lt": until_when})
-        .filter("term", action_labels="download")
-        .filter("term", item_guid=osfid)
+        .filter('term', item_public=True)
+        .filter('range', timestamp={'lt': until_when})
+        .filter('term', action_labels='download')
+        .filter('term', item_guid=osfid)
     )
     _search.aggs.metric(
-        "agg_session_count",
-        "cardinality",
-        field="session_id",
+        'agg_session_count',
+        'cardinality',
+        field='session_id',
         precision_threshold=_MAX_CARDINALITY_PRECISION,
     )
     _response = _search.execute()
     _download_count = _response.hits.total
     _download_session_count = (
         _response.aggregations.agg_session_count.value
-        if "agg_session_count" in _response.aggregations
+        if 'agg_session_count' in _response.aggregations
         else 0
     )
     return (_download_count, _download_session_count)
 
 
 def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str) -> int:
-    """aggregate views on each preprint"""
+    '''aggregate views on each preprint'''
     # copied/adapted from osf.metrics.preprint_metrics
     _search = (
         preprint_metric_cls.search()
-        .filter("term", preprint_id=osfid)
-        .filter("range", timestamp={"lt": until_when})
+        .filter('term', preprint_id=osfid)
+        .filter('range', timestamp={'lt': until_when})
         .extra(size=0)  # no hits; only aggs
     )
-    _search.aggs.metric("agg_count", "sum", field="count")
+    _search.aggs.metric('agg_count', 'sum', field='count')
     _response = _search.execute()
     _view_count = (
         int(_response.aggregations.agg_count.value)
-        if hasattr(_response.aggregations, "agg_count")
+        if hasattr(_response.aggregations, 'agg_count')
         else 0
     )
     return _view_count
 
 
 def _iri_from_osfid(osfid: str) -> str:
-    return f"{website_settings.DOMAIN}{osfid}"
+    return f'{website_settings.DOMAIN}{osfid}'
 
 
 @functools.lru_cache
@@ -499,34 +499,34 @@ def _convert_database_iri(provider_id: str | None, item_type: str) -> str:
         return website_settings.DOMAIN  # osf is a provider, sure why not
 
     def _fallback_iri():
-        return f"urn:osf.io:{provider_id}"
+        return f'urn:osf.io:{provider_id}'
 
     match item_type:  # lower-cased osf.models class names
-        case "node" | "osfuser":
-            # implicit "osf" provider
+        case 'node' | 'osfuser':
+            # implicit 'osf' provider
             return website_settings.DOMAIN
-        case "preprint":
+        case 'preprint':
             try:
                 _provider = osfdb.PreprintProvider.objects.get(_id=provider_id)
             except osfdb.PreprintProvider.DoesNotExist:
-                _logger.error(f"unknown preprint provider {provider_id!r}")
+                _logger.error(f'unknown preprint provider {provider_id!r}')
                 return _fallback_iri()
             else:
                 return _provider.get_semantic_iri()
-        case "registration":
+        case 'registration':
             try:
                 _provider = osfdb.RegistrationProvider.objects.get(_id=provider_id)
             except osfdb.RegistrationProvider.DoesNotExist:
-                _logger.error(f"unknown registration provider {provider_id!r}")
+                _logger.error(f'unknown registration provider {provider_id!r}')
                 return _fallback_iri()
             else:
                 return _provider.get_semantic_iri()
-        case _ if "file" in item_type:
+        case _ if 'file' in item_type:
             # file providers are a different thing that don't really have an iri, just an id
-            return f"urn:files.osf.io:{provider_id}"
+            return f'urn:files.osf.io:{provider_id}'
         case _:  # give up gracefully
             _logger.error(
-                f"unknown item type {item_type!r} with provider {provider_id!r}"
+                f'unknown item type {item_type!r} with provider {provider_id!r}'
             )
             return _fallback_iri()
 
@@ -534,16 +534,16 @@ def _fallback_iri():
 def _each_usage_report_osfid(until_when, after_osfid=None):
     _search = (
         es6_reports.PublicItemUsageReport.search()
-        .filter("range", timestamp={"lt": until_when})
+        .filter('range', timestamp={'lt': until_when})
         .extra(size=0)
     )
     _search.aggs.bucket(
-        "agg_osfid",
-        "composite",
-        sources=[{"osfid": {"terms": {"field": "item_osfid"}}}],
+        'agg_osfid',
+        'composite',
+        sources=[{'osfid': {'terms': {'field': 'item_osfid'}}}],
         size=500,
     )
-    return _iter_composite_bucket_keys(_search, "agg_osfid", "osfid", after=after_osfid)
+    return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
 
 ###
@@ -553,32 +553,32 @@ def _each_usage_report_osfid(until_when, after_osfid=None):
 class Command(BaseCommand):
     def add_arguments(self, parser):
         parser.add_argument(
-            "--no-setup",
-            action="store_true",
+            '--no-setup',
+            action='store_true',
         )
         parser.add_argument(
-            "--no-counts",
-            action="store_true",
+            '--no-counts',
+            action='store_true',
         )
         parser.add_argument(
-            "--clear-state",
-            action="store_true",
+            '--clear-state',
+            action='store_true',
         )
         parser.add_argument(
-            "--start",
-            action="store_true",
+            '--start',
+            action='store_true',
         )
         parser.add_argument(
-            "--unchanged",
-            action="store_true",
+            '--unchanged',
+            action='store_true',
         )
         parser.add_argument(
-            "--usage-events",
-            action="store_true",
+            '--usage-events',
+            action='store_true',
         )
         parser.add_argument(
-            "--usage-reports",
-            action="store_true",
+            '--usage-reports',
+            action='store_true',
         )
 
     @functools.cached_property
@@ -599,7 +599,7 @@ def handle(
     ):
         self._quiet_chatty_loggers()
         if not no_setup:
-            call_command("djelme_backend_setup")
+            call_command('djelme_backend_setup')
         if clear_state:
             self._clear_state()
         self._check_started_at(start_now=start)
@@ -611,7 +611,7 @@ def handle(
         if usage_reports or _default_all:
             self._handle_usage_reports(start=start, no_counts=no_counts)
         if not no_counts:
-            self.stdout.write("(counts may be approximate)")
+            self.stdout.write('(counts may be approximate)')
 
     def _handle_unchanged(self, *, start: bool, no_counts: bool):
         # for each (unchanged) report/event:
@@ -621,16 +621,16 @@ def _handle_unchanged(self, *, start: bool, no_counts: bool):
                 # display counts
                 _es6_count = _es6_cls.search().count()
                 _es8_count = _es8_cls.search().count()
-                self._write_tabbed("es6", _es6_cls, _es6_count)
+                self._write_tabbed('es6', _es6_cls, _es6_count)
                 self._write_tabbed(
-                    "es8",
+                    'es8',
                     _es8_cls,
                     _es8_count,
                     style=self._eq_style(_es8_count, _es6_count),
                 )
             if start:  # schedule task
                 self.stdout.write(
-                    f"starting {_es6_cls.__name__} => {_es8_cls.__name__}"
+                    f'starting {_es6_cls.__name__} => {_es8_cls.__name__}'
                 )
                 migrate_unchanged_recordtype.delay(
                     _es6_cls.__name__, self._migration_started_at.isoformat()
@@ -644,10 +644,10 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
         if not no_counts:
             # display counts for each view/download event type
             _range_q = {
-                "range": {
-                    "timestamp": {
-                        "gte": _range_start.isoformat(),
-                        "lt": _range_end.isoformat(),
+                'range': {
+                    'timestamp': {
+                        'gte': _range_start.isoformat(),
+                        'lt': _range_end.isoformat(),
                     }
                 }
             }
@@ -658,21 +658,21 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
                 _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
             )
             _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
-            self._write_tabbed("es6", PreprintView, _es6_pview_count)
-            self._write_tabbed("es6", PreprintDownload, _es6_pdownload_count)
-            self._write_tabbed("es6", CountedUsageEs6, _es6_usage_event_count)
+            self._write_tabbed('es6', PreprintView, _es6_pview_count)
+            self._write_tabbed('es6', PreprintDownload, _es6_pdownload_count)
+            self._write_tabbed('es6', CountedUsageEs6, _es6_usage_event_count)
             self._write_tabbed(
-                "es6", f"(total between {_range_start} and {_range_end})", _es6_count
+                'es6', f'(total between {_range_start} and {_range_end})', _es6_count
             )
             self._write_tabbed(
-                "es8",
+                'es8',
                 es8_metrics.OsfCountedUsageRecord,
                 _es8_count,
                 style=self._eq_style(_es8_count, _es6_count),
             )
         if start:  # schedule (per-day?) tasks (if --start)
             self.stdout.write(
-                f"starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}"
+                f'starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}'
             )
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
@@ -686,23 +686,23 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
             # display counts of reports and distinct items
             _es6_count, _es6_item_count = _es6_usage_report_counts()
             _es8_count, _es8_item_count = _es8_usage_report_counts()
-            self._write_tabbed("es6", es6_reports.PublicItemUsageReport, _es6_count)
+            self._write_tabbed('es6', es6_reports.PublicItemUsageReport, _es6_count)
             self._write_tabbed(
-                "es8",
+                'es8',
                 es8_metrics.PublicItemUsageReportEs8,
                 _es8_count,
                 style=self._eq_style(_es8_count, _es6_count),
             )
             self._write_tabbed(
-                "es6",
+                'es6',
                 es6_reports.PublicItemUsageReport,
-                "osfid count:",
+                'osfid count:',
                 _es6_item_count,
             )
             self._write_tabbed(
-                "es8",
+                'es8',
                 es8_metrics.PublicItemUsageReportEs8,
-                "(items)",
+                '(items)',
                 _es8_item_count,
                 style=self._eq_style(_es8_item_count, _es6_item_count),
             )
@@ -710,7 +710,7 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
         # each item-task iter thru reports oldest to newest, adding cumulative counts
         if start:
             self.stdout.write(
-                f"starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.PublicItemUsageReportEs8.__name__}"
+                f'starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.PublicItemUsageReportEs8.__name__}'
             )
             for _osfid in _each_usage_report_osfid(
                 until_when=self._migration_started_at
@@ -723,24 +723,24 @@ def _check_started_at(self, start_now):
         _started_at = self._migration_started_at
         if _started_at:
             self.stdout.write(
-                f"osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}"
+                f'osf.metrics 6->8 migration started previously, at {_started_at.isoformat()}'
             )
         elif start_now:
             _started_at = es8_metrics.Elastic6To8State.set_started_at_now()
             del self._migration_started_at  # clear cache
             self.stdout.write(
-                f"osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}"
+                f'osf.metrics 6->8 migration starting now, at {_started_at.isoformat()}'
             )
         else:
             self.stdout.write(
-                "osf.metrics 6->8 migration not started nor starting (run with `--start` to start)"
+                'osf.metrics 6->8 migration not started nor starting (run with `--start` to start)'
             )
 
     def _clear_state(self):
         self.stdout.write(
-            "clearing all migration state (start time, etc)", self.style.NOTICE
+            'clearing all migration state (start time, etc)', self.style.NOTICE
         )
-        es8_metrics.Elastic6To8State.search().query({"match_all": {}}).delete()
+        es8_metrics.Elastic6To8State.search().query({'match_all': {}}).delete()
         es8_metrics.Elastic6To8State.refresh()
 
     def _eq_style(self, num: int, should_be: int):
@@ -752,13 +752,13 @@ def _to_str(strable):
                 return strable.__name__
             return str(strable)
 
-        self.stdout.write("\t".join(map(_to_str, strables)), style)
+        self.stdout.write('\t'.join(map(_to_str, strables)), style)
 
     def _quiet_chatty_loggers(self):
         _chatty_loggers = [
-            "elasticsearch",
-            "elastic_transport",
-            "elasticsearch_metrics",
+            'elasticsearch',
+            'elastic_transport',
+            'elasticsearch_metrics',
         ]
         for logger_name in _chatty_loggers:
             logging.getLogger(logger_name).setLevel(logging.ERROR)

From 999dc869cd5bf0cd558f8cb2f0795e2a504e3427 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 13:43:29 -0400
Subject: [PATCH 056/100] fix: background migration task module

---
 website/settings/defaults.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index 2d174472576..69f82d2d2a7 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -489,7 +489,7 @@ class CeleryConfig:
     }
 
     background_migration_modules = {
-        'osf.management.commands.metrics_es8_migration',
+        'osf.management.commands.migrate_osfmetrics_6to8',
     }
 
     try:

From d9f5380aa7a1556a535b136e564f7c8e61d1fdc3 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 15:08:18 -0400
Subject: [PATCH 057/100] fix: timestamp tz handling

---
 osf/metrics/es8_metrics.py            | 2 +-
 osf_tests/metrics/test_es8_metrics.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 2f4023105d8..3b83103b197 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -133,7 +133,7 @@ def _get_unique_together_values(self):
             self.timestamp.year,
             self.timestamp.month,
             self.timestamp.day,
-            tzinfo=datetime.UTC,
+            tzinfo=self.timestamp.tzinfo,
         )
         time_in_seconds = (self.timestamp - day_start).total_seconds()
         time_window = int(time_in_seconds / 30)  # 30-second windows
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index e93579628dc..e9dd140b60a 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -1,4 +1,4 @@
-from datetime import datetime
+import datetime
 
 from elasticsearch_metrics.tests.util import djelme_test_backends
 import pytest
@@ -20,7 +20,7 @@ def _real_elastic(self):
 
     def test_nested_pageview_autofill(self):
         usage = OsfCountedUsageRecord.record(
-            timestamp=datetime(2024, 1, 1, 15, 0),
+            timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
             sessionhour_id='blah',
             database_iri='https://osf.example/provider',
             item_iri='https://osf.example/itemm',

From beb85485f6f06df8abdd98a703c3b31e139e0d98 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 15:27:03 -0400
Subject: [PATCH 058/100] fix: tests with djelme

---
 osf_tests/metrics/test_es8_metrics.py | 41 ++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index e9dd140b60a..ce562a026b4 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -39,13 +39,52 @@ def test_nested_pageview_autofill(self):
         assert usage.pageview_info.page_path == '/path/test'
         assert usage.pageview_info.referer_domain == 'google.com'
         assert usage.pageview_info.hour_of_day == 15
+        assert usage.item_iri in usage.within_iris
+
+    def test_nested_pageview_autofill_dict(self):
+        usage = OsfCountedUsageRecord.record(
+            timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
+            sessionhour_id='blah',
+            database_iri='https://osf.example/provider',
+            item_iri='https://osf.example/itemm',
+            item_osfid='itemm',
+            item_public=True,
+            item_type='https://osf.example/Preprint',
+            platform_iri='https://osf.example',
+            user_is_authenticated=False,
+            pageview_info={
+                'page_url': 'https://example.com/path/test',
+                'referer_url': 'https://google.com',
+                'route_name': 'foo.bar',
+                'page_title': 'title title',
+            },
+        )
+        assert usage.pageview_info.page_path == '/path/test'
+        assert usage.pageview_info.referer_domain == 'google.com'
+        assert usage.pageview_info.hour_of_day == 15
+        assert usage.item_iri in usage.within_iris
+
+    def test_none_pageview_nested_autofill(self):
+        usage = OsfCountedUsageRecord.record(
+            timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
+            sessionhour_id='blah',
+            database_iri='https://osf.example/provider',
+            item_iri='https://osf.example/itemm',
+            item_osfid='itemm',
+            item_public=True,
+            item_type='https://osf.example/Preprint',
+            platform_iri='https://osf.example',
+            user_is_authenticated=False,
+        )
+        assert usage.pageview_info is None
+        assert usage.item_iri in usage.within_iris
 
     def test_save_report(self):
         _saved = DownloadCountReportEs8.record(
             cycle_coverage='2026.1.1',
             daily_file_downloads=17,
         )
-        DownloadCountReportEs8.refresh_timeseries_indexes()
+        DownloadCountReportEs8.refresh()
         _response = DownloadCountReportEs8.search().execute()
         (_fetched,) = _response
         assert _fetched.meta.id == _saved.meta.id

From 778f4b435627a08a7d5f475a578c6be0d37e5cb2 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 15:48:40 -0400
Subject: [PATCH 059/100] fix: pageview_info optional

---
 osf/metrics/es8_metrics.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 3b83103b197..4c46710748c 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -57,19 +57,19 @@ class PageviewInfo(esdsl.InnerDoc):
     """
 
     # fields that should be provided
-    referer_url: str
-    page_url: str
-    page_title: str
-    route_name: str = esdsl.mapped_field(esdsl.Keyword(
+    referer_url: str | None
+    page_url: str | None
+    page_title: str | None
+    route_name: str | None = esdsl.mapped_field(esdsl.Keyword(
         fields={
             'by_prefix': esdsl.Text(analyzer=route_prefix_analyzer),
         },
     ))
 
     # fields auto-filled
-    page_path: str
-    referer_domain: str
-    hour_of_day: int
+    page_path: str | None
+    referer_domain: str | None
+    hour_of_day: int | None
 
 
 ###
@@ -111,7 +111,9 @@ def clean(self):
             if _ref_url:
                 self.pageview_info.referer_domain = urlsplit(_ref_url).netloc
         # ensure inclusive "within"
-        if self.item_iri not in self.within_iris:
+        if not self.within_iris:
+            self.within_iris = [self.item_iri]
+        elif self.item_iri not in self.within_iris:
             self.within_iris = [self.item_iri, *self.within_iris]
 
     def _get_unique_together_values(self):

From ee913841430543a12f89c16a1aba3e40bb1e280b Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 16:59:53 -0400
Subject: [PATCH 060/100] fix: tests

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 2 ++
 osf_tests/metrics/test_es8_metrics.py              | 2 +-
 poetry.lock                                        | 6 +++---
 pyproject.toml                                     | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index ccc15834644..04afa94b6b9 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -2,6 +2,7 @@
 import datetime
 import functools
 import logging
+import uuid
 
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
@@ -340,6 +341,7 @@ def _convert_preprint_metric(
         # fields used to compute a sessionhour_id:
         timestamp=source['timestamp'],
         user_id=source.get('user_id'),
+        client_session_id=str(uuid.uuid4()),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
         database_iri=_convert_database_iri(source.get('provider_id'), 'preprint'),
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index ce562a026b4..a871054e96b 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -76,7 +76,7 @@ def test_none_pageview_nested_autofill(self):
             platform_iri='https://osf.example',
             user_is_authenticated=False,
         )
-        assert usage.pageview_info is None
+        assert not usage.pageview_info
         assert usage.item_iri in usage.within_iris
 
     def test_save_report(self):
diff --git a/poetry.lock b/poetry.lock
index 1aec6afa426..4fcf24cabd1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "34c7b180e6d595b3374534cd50efb00f5a809582"
-resolved_reference = "34c7b180e6d595b3374534cd50efb00f5a809582"
+reference = "222f03e92ec45a86f76db7a0461ae4fc483b2810"
+resolved_reference = "222f03e92ec45a86f76db7a0461ae4fc483b2810"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "9edb43576b960885c14e32e9ae74218c28d883df48679868848dbaa5780c4b12"
+content-hash = "e510408fd1590e2ec46f022a6004e55df2c813f6e8688d0c6d75308f1dccf43b"
diff --git a/pyproject.toml b/pyproject.toml
index 815efdd61a6..ade2030afdd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "34c7b180e6d595b3374534cd50efb00f5a809582"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "222f03e92ec45a86f76db7a0461ae4fc483b2810"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From a65d6a580159eabd5cf6fdae8ab89b5d9ade5cfb Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 17:28:49 -0400
Subject: [PATCH 061/100] fix: preprint metric conversion

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 04afa94b6b9..92b01e913c3 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -339,7 +339,7 @@ def _convert_preprint_metric(
     return es8_metrics.OsfCountedUsageRecord.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
-        timestamp=source['timestamp'],
+        timestamp=datetime.datetime.fromisoformat(source['timestamp']),
         user_id=source.get('user_id'),
         client_session_id=str(uuid.uuid4()),
         # fields from djelme.CountedUsageRecord:

From 2059a5657e8d60312da3b9a1a99d2fe129dfc5be Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 21 Apr 2026 17:28:58 -0400
Subject: [PATCH 062/100] fix: osf_shell

---
 osf/management/commands/osf_shell.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/osf/management/commands/osf_shell.py b/osf/management/commands/osf_shell.py
index 851895623ac..69443d004be 100644
--- a/osf/management/commands/osf_shell.py
+++ b/osf/management/commands/osf_shell.py
@@ -32,7 +32,7 @@ def get_user_imports():
 from django.db.models import Model
 from django_extensions.management.commands import shell_plus
 from django_extensions.management.utils import signalcommand
-from elasticsearch_metrics.registry import registry as metrics_registry
+from elasticsearch_metrics.registry import djelme_registry
 
 
 def header(text):
@@ -160,7 +160,7 @@ def get_osf_imports(self):
     def get_metrics(self):
         return {
             each.__name__: each
-            for each in metrics_registry.get_metrics()
+            for each in djelme_registry.each_recordtype()
         }
 
     def get_grouped_imports(self, options):

From c186373defd8b8bb732b1410fe320bb6c9553236 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 22 Apr 2026 15:58:00 -0400
Subject: [PATCH 063/100] per-deployment djelme index name prefix

---
 api/base/settings/defaults.py | 3 +++
 poetry.lock                   | 6 +++---
 pyproject.toml                | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index 72e169c25a1..8f3683b6115 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -325,6 +325,7 @@
     },
     'osfmetrics_es8': {
         'elasticsearch_metrics.imps.elastic8': {
+            # passthru kwargs to elasticsearch8 connection constructor
             'hosts': osf_settings.ELASTIC8_URI,
             'ca_certs': osf_settings.ELASTIC8_CERT_PATH,
             'basic_auth': (
@@ -332,6 +333,8 @@
                 if osf_settings.ELASTIC8_SECRET is not None
                 else None
             ),
+            # djelme-specific kwargs
+            'djelme_default_index_name_prefix': osf_settings.SHARE_PROVIDER_PREPEND,
         },
     },
 }
diff --git a/poetry.lock b/poetry.lock
index 4fcf24cabd1..c16b7d021e0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "222f03e92ec45a86f76db7a0461ae4fc483b2810"
-resolved_reference = "222f03e92ec45a86f76db7a0461ae4fc483b2810"
+reference = "4e833670178beb682bb0d64e4f33db012cf8f014"
+resolved_reference = "4e833670178beb682bb0d64e4f33db012cf8f014"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "e510408fd1590e2ec46f022a6004e55df2c813f6e8688d0c6d75308f1dccf43b"
+content-hash = "d08b71fd886f9c6bd3d8d6cb1eda9f08431b7e84398b107e25f0371a4111266b"
diff --git a/pyproject.toml b/pyproject.toml
index ade2030afdd..fcc0decc86d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "222f03e92ec45a86f76db7a0461ae4fc483b2810"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "4e833670178beb682bb0d64e4f33db012cf8f014"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From e161f5d9e2480a53022df20823300378e3ff7b01 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 23 Apr 2026 13:45:12 -0400
Subject: [PATCH 064/100] better counted-usage autofill (and item_type iris)

---
 .../commands/migrate_osfmetrics_6to8.py       | 93 ++++++++++---------
 osf/metadata/osf_gathering.py                 | 63 ++-----------
 osf/metadata/osfmap_utils.py                  | 65 +++++++++++++
 osf/metadata/serializers/linkset.py           |  4 +-
 osf/metrics/es8_metrics.py                    | 87 +++++++++++++++--
 5 files changed, 206 insertions(+), 106 deletions(-)
 create mode 100644 osf/metadata/osfmap_utils.py

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 92b01e913c3..b77d7b6af92 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -4,6 +4,7 @@
 import logging
 import uuid
 
+from django.apps import apps
 from django.core.management import call_command
 from django.core.management.base import BaseCommand
 from django.db import OperationalError as DjangoOperationalError
@@ -16,6 +17,8 @@
 from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
+from osf.metadata.rdfutils import OSF
+from osf.metadata.osfmap_utils import osfmap_type_from_model, osf_iri, is_osf_component
 from osf.metrics.preprint_metrics import (
     PreprintView,
     PreprintDownload,
@@ -131,6 +134,9 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
 @celery_app.task(**_TASK_KWARGS)
 def migrate_usage_reports(osfid: str, until_when: str):
     # from PublicItemUsageReport to PublicItemUsageReportEs8
+    _osfguid = osfdb.Guid.load(osfid)
+    _item_is_component = is_osf_component(_osfguid.referent) if _osfguid else False
+
     def _each_new():
         # go in sorted order to build cumulative counts
         # (only a few dozen of these per item; should be fine to sort and load all at once)
@@ -144,7 +150,9 @@ def _each_new():
         for _hit in list(_each_hit):
             yield (
                 _prior_report := _convert_public_usage_report(
-                    _hit['_source'], _prior_report
+                    _hit['_source'],
+                    _prior_report,
+                    item_is_component=_item_is_component,
                 )
             )
 
@@ -307,23 +315,25 @@ def _each_kwarg():
 
 
 def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
-    _item_iri = _iri_from_osfid(source['item_guid'])
     return es8_metrics.OsfCountedUsageRecord(
         # fields from djelme.CountedUsageRecord:
         timestamp=source['timestamp'],
         sessionhour_id=source['session_id'],
         platform_iri=source.get('platform_iri') or website_settings.DOMAIN,
         database_iri=_convert_database_iri(
-            source.get('provider_id'), source.get('item_type')
+            provider_id=source.get('provider_id'),
+            osf_model_name=source.get('item_type'),
         ),
-        item_iri=_item_iri,
         within_iris=[
-            _iri_from_osfid(_within_osfid)
+            osf_iri(_within_osfid)
             for _within_osfid in source.get('surrounding_guids', ())
         ],
         # fields from OsfCountedUsageRecord:
         item_osfid=source['item_guid'],
-        item_type=source.get('item_type', 'osf:Object'),
+        item_type=_convert_item_type(
+            source.get('item_type'),
+            has_surrounding_items=bool(source.get('surrounding_guids')),
+        ),
         item_public=source.get('item_public'),
         provider_id=source.get('provider_id'),
         user_is_authenticated=source.get('user_is_authenticated'),
@@ -335,7 +345,6 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
 def _convert_preprint_metric(
     source: dict, action_labels: list[str]
 ) -> es8_metrics.OsfCountedUsageRecord:
-    _preprint_iri = _iri_from_osfid(source['preprint_id'])
     return es8_metrics.OsfCountedUsageRecord.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
@@ -344,12 +353,13 @@ def _convert_preprint_metric(
         client_session_id=str(uuid.uuid4()),
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
-        database_iri=_convert_database_iri(source.get('provider_id'), 'preprint'),
-        item_iri=_preprint_iri,
-        within_iris=[_preprint_iri],
+        database_iri=_convert_database_iri(
+            provider_id=source.get('provider_id'),
+            osf_model_name='preprint',
+        ),
         # fields from OsfCountedUsageRecord:
         item_osfid=source['preprint_id'],
-        item_type='preprint',
+        item_type=OSF.Preprint,
         item_public=True,
         provider_id=source.get('provider_id'),
         user_is_authenticated=bool(source.get('user_id')),
@@ -360,12 +370,13 @@ def _convert_preprint_metric(
 def _convert_public_usage_report(
     source: dict,
     prior_report: es8_metrics.PublicItemUsageReportEs8 | None,
+    item_is_component: bool,
 ) -> es8_metrics.PublicItemUsageReportEs8:
     if prior_report is None:
         _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
             osfid=source['item_osfid'],
             until_when=YearMonth.from_str(source['report_yearmonth']).month_end(),
-            item_type=source.get('item_type'),
+            is_preprint=(source.get('item_type') == 'preprint'),
         )
     else:
         _c_views = prior_report.cumulative_view_count + source.get('view_count', 0)
@@ -381,7 +392,10 @@ def _convert_public_usage_report(
     return es8_metrics.PublicItemUsageReportEs8(
         cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
         item_osfid=source['item_osfid'],
-        item_type=source.get('item_type'),
+        item_type=_convert_item_type(
+            source.get('item_type'),
+            has_surrounding_items=item_is_component,
+        ),
         provider_id=source.get('provider_id'),
         platform_iri=source.get('platform_iri') or website_settings.DOMAIN,
         view_count=source.get('view_count'),
@@ -395,8 +409,8 @@ def _convert_public_usage_report(
     )
 
 
-def _get_cumulative_usage(osfid: str, until_when, item_type: str | None):
-    if item_type == 'preprint':
+def _get_cumulative_usage(osfid: str, until_when, *, is_preprint: bool):
+    if is_preprint:
         _views = _cumulative_preprint_count(PreprintView, osfid, until_when)
         _downloads = _cumulative_preprint_count(PreprintDownload, osfid, until_when)
         _view_sess, _download_sess = 0, 0  # no session info on preprints (yet)
@@ -491,46 +505,37 @@ def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str)
     return _view_count
 
 
-def _iri_from_osfid(osfid: str) -> str:
-    return f'{website_settings.DOMAIN}{osfid}'
+def _convert_item_type(osf_model_name: str | None, has_surrounding_items: bool):
+    if osf_model_name:
+        try:
+            return osfmap_type_from_model(
+                apps.get_model('osf', osf_model_name),
+                is_component=has_surrounding_items,
+            )
+        except LookupError:
+            pass
+    return OSF.Object  # fine, fallback to abstract type
 
 
-@functools.lru_cache
-def _convert_database_iri(provider_id: str | None, item_type: str) -> str:
+def _convert_database_iri(provider_id: str | None, osf_model_name: str) -> str:
     if not provider_id:
         return website_settings.DOMAIN  # osf is a provider, sure why not
 
-    def _fallback_iri():
-        return f'urn:osf.io:{provider_id}'
-
-    match item_type:  # lower-cased osf.models class names
-        case 'node' | 'osfuser':
-            # implicit 'osf' provider
+    match osf_model_name:  # lower-cased osf.models class names
+        case 'node' | 'osfuser':  # implicit untyped 'osf' provider
             return website_settings.DOMAIN
-        case 'preprint':
-            try:
-                _provider = osfdb.PreprintProvider.objects.get(_id=provider_id)
-            except osfdb.PreprintProvider.DoesNotExist:
-                _logger.error(f'unknown preprint provider {provider_id!r}')
-                return _fallback_iri()
-            else:
-                return _provider.get_semantic_iri()
-        case 'registration':
-            try:
-                _provider = osfdb.RegistrationProvider.objects.get(_id=provider_id)
-            except osfdb.RegistrationProvider.DoesNotExist:
-                _logger.error(f'unknown registration provider {provider_id!r}')
-                return _fallback_iri()
-            else:
-                return _provider.get_semantic_iri()
-        case _ if 'file' in item_type:
+        case 'preprint':  # match PreprintProvider.get_semantic_iri
+            return f'{website_settings.DOMAIN}preprints/{provider_id}'
+        case 'registration':  # match RegistrationProvider.get_semantic_iri
+            return f'{website_settings.DOMAIN}registries/{provider_id}'
+        case _ if 'file' in osf_model_name:
             # file providers are a different thing that don't really have an iri, just an id
             return f'urn:files.osf.io:{provider_id}'
         case _:  # give up gracefully
             _logger.error(
-                f'unknown item type {item_type!r} with provider {provider_id!r}'
+                f'unknown model {osf_model_name!r} with provider {provider_id!r}'
             )
-            return _fallback_iri()
+            return f'urn:osf.io:{provider_id}'
 
 
 def _each_usage_report_osfid(until_when, after_osfid=None):
diff --git a/osf/metadata/osf_gathering.py b/osf/metadata/osf_gathering.py
index dfa74612bd0..118151627d3 100644
--- a/osf/metadata/osf_gathering.py
+++ b/osf/metadata/osf_gathering.py
@@ -13,6 +13,12 @@
 from osf import models as osfdb
 from osf.metadata import gather
 from osf.metadata.definitions.datacite import DATACITE_RESOURCE_TYPES_GENERAL
+from osf.metadata.osfmap_utils import (
+    osfmap_type,
+    osf_iri,
+    is_osf_component,
+    osfid_from_iri,
+)
 from osf.metadata.rdfutils import (
     DATACITE,
     DCAT,
@@ -30,7 +36,6 @@
     SKOS,
     checksum_iri,
     format_dcterms_extent,
-    without_namespace,
     smells_like_iri,
 )
 from osf.metrics.reports import PublicItemUsageReport
@@ -319,15 +324,13 @@ def get_expiration_date(self, basket: gather.Basket) -> datetime.date | None:
 ##### END osfmap #####
 
 
-##### BEGIN osf-specific utils #####
-
 class OsfFocus(gather.Focus):
     def __init__(self, osf_item):
         if isinstance(osf_item, str):
             osf_item = osfdb.base.coerce_guid(osf_item).referent
         super().__init__(
             iri=osf_iri(osf_item),
-            rdftype=get_rdf_type(osf_item),
+            rdftype=osfmap_type(osf_item),
             provider_id=osf_item.provider._id if (osf_item and getattr(osf_item, 'type', '') == 'osf.registration' and osf_item.provider) else None
         )
         self.dbmodel = osf_item
@@ -337,54 +340,6 @@ def __init__(self, osf_item):
             pass  # is ok for a focus to be something non-osfguidy
 
 
-def is_root(osf_node):
-    return (osf_node.root_id == osf_node.id)
-
-
-def get_rdf_type(osfguid_referent):
-    if isinstance(osfguid_referent, osfdb.Guid):
-        osfguid_referent = osfguid_referent.referent
-
-    if isinstance(osfguid_referent, osfdb.OSFUser):
-        return DCTERMS.Agent
-    if isinstance(osfguid_referent, osfdb.BaseFileNode):
-        return OSF.File
-    if isinstance(osfguid_referent, osfdb.Preprint):
-        return OSF.Preprint
-    if isinstance(osfguid_referent, osfdb.Registration):
-        return (
-            OSF.Registration
-            if is_root(osfguid_referent)
-            else OSF.RegistrationComponent
-        )
-    if isinstance(osfguid_referent, osfdb.Node):
-        return (
-            OSF.Project
-            if is_root(osfguid_referent)
-            else OSF.ProjectComponent
-        )
-    raise NotImplementedError
-
-
-def osf_iri(guid_or_model):
-    """return a rdflib.URIRef or None
-
-    @param guid_or_model: a string, Guid instance, or another osf model instance
-    @returns rdflib.URIRef or None
-    """
-    guid = osfdb.base.coerce_guid(guid_or_model)
-    return OSFIO[guid._id]
-
-
-def osfguid_from_iri(iri: str) -> str:
-    if iri.startswith(OSFIO):
-        return without_namespace(iri, OSFIO)
-    raise ValueError(f'expected iri starting with "{OSFIO}" (got "{iri}")')
-
-
-##### END osf-specific utils #####
-
-
 ##### BEGIN the gatherers #####
 #
 
@@ -718,7 +673,7 @@ def gather_file_mediatype(focus):
 @gather.er(DCTERMS.hasPart, DCTERMS.isPartOf)
 def gather_parts(focus):
     if isinstance(focus.dbmodel, osfdb.AbstractNode):
-        if not is_root(focus.dbmodel) and focus.dbmodel.root.is_public:
+        if is_osf_component(focus.dbmodel) and focus.dbmodel.root.is_public:
             root_focus = OsfFocus(focus.dbmodel.root)
             yield (OSF.hasRoot, root_focus)
         child_relations = (
@@ -1130,7 +1085,7 @@ def gather_cedar_templates(focus):
 @gather.er(OSF.usage)
 def gather_last_month_usage(focus):
     _usage_report = PublicItemUsageReport.for_last_month(
-        item_osfid=osfguid_from_iri(focus.iri),
+        item_osfid=osfid_from_iri(focus.iri),
     )
     if _usage_report is not None:
         _usage_report_ref = rdflib.BNode()
diff --git a/osf/metadata/osfmap_utils.py b/osf/metadata/osfmap_utils.py
new file mode 100644
index 00000000000..e3e9ab89a9c
--- /dev/null
+++ b/osf/metadata/osfmap_utils.py
@@ -0,0 +1,65 @@
+from osf.metadata.rdfutils import (
+    DCTERMS,
+    OSF,
+    OSFIO,
+    without_namespace,
+)
+from osf import models as osfdb
+
+
+def is_osf_component(osf_node) -> bool:
+    return (
+        isinstance(osf_node, osfdb.AbstractNode)
+        and osf_node.root_id != osf_node.id
+    )
+
+
+def osfmap_type_from_model(model_cls, *, is_component=None):
+    if issubclass(model_cls, osfdb.OSFUser):
+        return DCTERMS.Agent
+    if issubclass(model_cls, osfdb.BaseFileNode):
+        return OSF.File
+    if issubclass(model_cls, osfdb.Preprint):
+        return OSF.Preprint
+    if issubclass(model_cls, osfdb.Registration):
+        if is_component is None:
+            raise ValueError(f'osfmap_type_from_model requires `is_component` for {model_cls}')
+        return (
+            OSF.RegistrationComponent
+            if is_component
+            else OSF.Registration
+        )
+    if issubclass(model_cls, osfdb.Node):
+        if is_component is None:
+            raise ValueError(f'osfmap_type_from_model requires `is_component` for {model_cls}')
+        return (
+            OSF.ProjectComponent
+            if is_component
+            else OSF.Project
+        )
+    raise LookupError(model_cls)
+
+
+def osfmap_type(osf_obj):
+    if isinstance(osf_obj, osfdb.Guid):
+        osf_obj = osf_obj.referent
+    return osfmap_type_from_model(type(osf_obj), is_component=is_osf_component(osf_obj))
+
+
+def osf_iri(guid_or_model):
+    """return a rdflib.URIRef or None
+
+    @param guid_or_model: a string, Guid instance, or another osf model instance
+    @returns rdflib.URIRef or None
+    """
+    guid = osfdb.base.coerce_guid(guid_or_model)
+    return OSFIO[guid._id]
+
+
+def osfid_from_iri(iri: str) -> str:
+    if not iri.startswith(OSFIO):
+        raise ValueError(f'expected iri starting with "{OSFIO}" (got {iri!r})')
+    _osfid = without_namespace(iri, OSFIO)
+    if not _osfid or '/' in _osfid:
+        raise ValueError(f'expected iri path with exactly one segment (got {_osfid!r} from {iri!r})')
+    return _osfid
diff --git a/osf/metadata/serializers/linkset.py b/osf/metadata/serializers/linkset.py
index f83dad00ebd..3ee907d0532 100644
--- a/osf/metadata/serializers/linkset.py
+++ b/osf/metadata/serializers/linkset.py
@@ -16,7 +16,7 @@
 import rdflib
 
 from ._base import MetadataSerializer
-from osf.metadata.osf_gathering import osfguid_from_iri
+from osf.metadata.osf_gathering import osfid_from_iri
 from osf.metadata.rdfutils import (DOI, DATACITE, DCTERMS, OWL, RDF, OSF, DCAT, SCHEMA, DATACITE_SCHEMA_RESOURCE_TYPE_GENERAL_MAPPING, map_resource_type_general_datacite_to_scheme)
 from website.settings import DOMAIN
 from website.util import web_url_for
@@ -74,7 +74,7 @@ def _each_link(self) -> Iterator[SignpostLink]:
 
         base_metadata_url = urljoin(DOMAIN, web_url_for(
             'metadata_download',  # name of a view function mapped in website/routes.py
-            guid=osfguid_from_iri(self.basket.focus.iri),
+            guid=osfid_from_iri(self.basket.focus.iri),
         ))
         split_base_metadata_url = urlsplit(base_metadata_url)
 
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 4c46710748c..3bc573865c3 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -1,12 +1,21 @@
 import datetime
 import enum
+import functools
 from urllib.parse import urlsplit
 
 import elasticsearch8.dsl as esdsl
 from elasticsearch_metrics import DAILY, MONTHLY, YEARLY
 import elasticsearch_metrics.imps.elastic8 as djelme
 
+from osf.metadata.osfmap_utils import (
+    osfmap_type,
+    osf_iri,
+    osfid_from_iri,
+)
+from osf.metrics.counted_usage import _get_surrounding_guids
 from osf.metrics.utils import YearMonth
+from osf import models as osfdb
+from website import settings as website_settings
 
 
 ###
@@ -99,9 +108,69 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     action_labels: list[str]
     pageview_info: PageviewInfo | None
 
+    @functools.cached_property
+    def _osfid_referent(self):
+        # for use by autofill methods, if needed
+        return osfdb.Guid.load(self.item_osfid)
+
     def clean(self):
         super().clean()
-        # autofill pageview_info fields
+        self._autofill_item_iri_and_osfid()
+        self._autofill_item_public()
+        self._autofill_item_type()
+        self._autofill_provider_id()
+        self._autofill_within_iris()
+        self._autofill_pageview()
+        self._autofill_database_iri()
+
+    def _autofill_item_iri_and_osfid(self):
+        if self.item_osfid and not self.item_iri:
+            self.item_iri = osf_iri(self.item_osfid)
+        elif self.item_iri and not self.item_osfid:
+            try:
+                self.item_osfid = osfid_from_iri(self.item_iri)
+            except ValueError:
+                pass
+
+    def _autofill_item_public(self):
+        if self.item_osfid and (self.item_public is None):
+            _item = self._osfid_referent
+            # if it quacks like BaseFileNode, look at .target instead
+            _item = getattr(_item, 'target', None) or _item
+            self.item_public = (
+                _item.verified_publishable               # quacks like Preprint
+                if hasattr(_item, 'verified_publishable')
+                else getattr(_item, 'is_public', False)  # quacks like AbstractNode
+            )
+
+    def _autofill_item_type(self):
+        if self.item_osfid and not self.item_type:
+            self.item_type = osfmap_type(self._osfid_referent)
+
+    def _autofill_provider_id(self):
+        if self.item_osfid and not self.provider_id:
+            _provider = getattr(self._osfid_referent, 'provider', None)
+            if _provider is None:
+                self.provider_id = 'osf'          # quacks like Node, Comment, WikiPage
+            elif isinstance(_provider, str):
+                self.provider_id = _provider      # quacks like BaseFileNode
+            else:
+                self.provider_id = _provider._id  # quacks like Registration, Preprint, Collection
+
+    def _autofill_within_iris(self):
+        if self.item_osfid and (self.within_iris is None) and self._osfid_referent:
+            self.within_iris = [
+                osf_iri(_osfid)
+                for _osfid in _get_surrounding_guids(self._osfid_referent)
+            ]
+        # ensure inclusive "within"
+        if not self.within_iris:
+            self.within_iris = [self.item_iri]
+        if self.item_iri not in self.within_iris:
+            self.within_iris = [self.item_iri, *self.within_iris]
+
+    def _autofill_pageview(self):
+        # autofill pageview_info fields from other fields
         if self.pageview_info:
             self.pageview_info.hour_of_day = self.timestamp.hour
             _url = self.pageview_info.page_url
@@ -110,11 +179,17 @@ def clean(self):
             _ref_url = self.pageview_info.referer_url
             if _ref_url:
                 self.pageview_info.referer_domain = urlsplit(_ref_url).netloc
-        # ensure inclusive "within"
-        if not self.within_iris:
-            self.within_iris = [self.item_iri]
-        elif self.item_iri not in self.within_iris:
-            self.within_iris = [self.item_iri, *self.within_iris]
+
+    def _autofill_database_iri(self):
+        if self.item_osfid and not self.database_iri:
+            _provider = getattr(self._osfid_referent, 'provider', None)
+            if not _provider:
+                self.database_iri = website_settings.DOMAIN
+            elif isinstance(_provider, str):
+                # file providers are a different thing that don't really have an iri, just an id
+                self.database_iri = f'urn:files.osf.io:{self.provider_id}'
+            else:
+                self.database_iri = _provider.get_semantic_iri()
 
     def _get_unique_together_values(self):
         """get "unique together" values for "ON CONFLICT UPDATE" behavior

From 45d1e30ccaed64b7a274c0d6748af65feb088e1a Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 24 Apr 2026 09:32:18 -0400
Subject: [PATCH 065/100] osf-admin migrate_osfmetrics_6to8

---
 admin/management/urls.py                 |  5 +++--
 admin/management/views.py                | 20 ++++++++++++++++++++
 admin/templates/management/commands.html | 23 +++++++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/admin/management/urls.py b/admin/management/urls.py
index d583deb2ce0..79c5be0a7a2 100644
--- a/admin/management/urls.py
+++ b/admin/management/urls.py
@@ -1,4 +1,4 @@
-from django.urls import re_path
+from django.urls import re_path, path
 
 from admin.management import views
 
@@ -19,5 +19,6 @@
     re_path(r'^empty_metadata_dataarchive_registration_bulk_resync', views.EmptyMetadataDataarchiveRegistrationBulkResync.as_view(),
             name='empty-metadata-dataarchive-registration-bulk-resync'),
     re_path(r'^sync_notification_templates', views.SyncNotificationTemplates.as_view(),
-            name='sync_notification_templates')
+            name='sync_notification_templates'),
+    path('migrate_osfmetrics_6to8', views.MigrateOsfmetrics6to8.as_view(), name='migrate_osfmetrics_6to8'),
 ]
diff --git a/admin/management/views.py b/admin/management/views.py
index 36f3d893f24..c390d08e629 100644
--- a/admin/management/views.py
+++ b/admin/management/views.py
@@ -1,9 +1,12 @@
+from io import StringIO
+
 from dateutil.parser import isoparse
 from django.views.generic import TemplateView, View
 from django.contrib import messages
 from django.http import HttpResponse
 from django.utils import timezone
 from django.contrib.auth.mixins import PermissionRequiredMixin
+from django.core.management import call_command, CommandError
 
 from osf.management.commands.manage_switch_flags import manage_waffle
 from osf.management.commands.update_registration_schemas import update_registration_schemas
@@ -181,3 +184,20 @@ def post(self, request):
         populate_notification_types()
         messages.success(request, 'Notification templates have been successfully synced.')
         return redirect(reverse('management:commands'))
+
+
+class MigrateOsfmetrics6to8(ManagementCommandPermissionView):
+    def post(self, request):
+        _command_kwargs = {
+            'no_setup': True,
+            'no_counts': request.POST.get('no_counts'),
+            'clear_state': request.POST.get('clear_state'),
+            'start': request.POST.get('start'),
+            'unchanged': request.POST.get('unchanged'),
+            'usage_reports': request.POST.get('usage_reports'),
+            'usage_events': request.POST.get('usage_events'),
+        }
+        _out_io = StringIO()
+        call_command('migrate_osfmetrics_6to8', **_command_kwargs, stdout=_out_io)
+        messages.info(request, _out_io.getvalue())
+        return redirect(reverse('management:commands'))
diff --git a/admin/templates/management/commands.html b/admin/templates/management/commands.html
index dd90affd5ff..ae4ea406b00 100644
--- a/admin/templates/management/commands.html
+++ b/admin/templates/management/commands.html
@@ -165,6 +165,29 @@ <h4><u>Sync Notification Templates</u></h4>
                     </nav>
                 </form>
             </section>
+            <section>
+                <h4><u>migrate osf-metrics 6to8</u></h4>
+                <p>
+                    view progress of the osf-metrics migration from elastic6 to elastic8 (or start it)
+                </p>
+                <form method="post"
+                      action="{% url 'management:migrate_osfmetrics_6to8'%}"
+                      style="display: flex; flex-direction: column;">
+                    {% csrf_token %}
+                    <label><input type="checkbox" name="no_counts"> no counts</label>
+                    <label><input type="checkbox" name="clear_state"> reset migration start time (caution)</label>
+                    <label><input type="checkbox" name="start"> start tasks (caution)</label>
+                    <fieldset>
+                        default all if unselected:
+                        <label><input type="checkbox" name="unchanged"> unchanged events and reports</label>
+                        <label><input type="checkbox" name="usage_events"> usage events</label>
+                        <label><input type="checkbox" name="usage_reports"> usage reports</label>
+                    </fieldset>
+                    <nav>
+                        <input class="btn btn-success" type="submit" value="Run" />
+                    </nav>
+                </form>
+            </section>
         </div>
     </section>
 {% endblock %}

From 2537561f7d682c5273ef7bb33df864cc93056db2 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 24 Apr 2026 08:44:17 -0400
Subject: [PATCH 066/100] /_/metrics/raw-es8_metrics/...

---
 api/metrics/urls.py                   |  4 +++-
 api/metrics/views.py                  | 32 +++++++++++++++++----------
 api_tests/metrics/test_raw_metrics.py | 12 +++++-----
 poetry.lock                           |  6 ++---
 pyproject.toml                        |  2 +-
 5 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/api/metrics/urls.py b/api/metrics/urls.py
index e135212541c..db63df3dd4c 100644
--- a/api/metrics/urls.py
+++ b/api/metrics/urls.py
@@ -5,7 +5,9 @@
 app_name = 'osf'
 
 urlpatterns = [
-    re_path(r'^raw/(?P<url_path>[a-z0-9._/]*)$', views.RawMetricsView.as_view(), name=views.RawMetricsView.view_name),
+    re_path(r'^raw/(?P<url_path>[a-z0-9._/]*)$', views.RawMetricsView.as_view(), name=views.RawMetricsView.view_name, kwargs={'djelme_backend_name': 'osfmetrics_es6'}),
+    path('raw-<djelme_backend_name>/', views.RawMetricsView.as_view(), name=views.RawMetricsView.view_name, kwargs={'url_path': ''}),
+    path('raw-<djelme_backend_name>/<path:url_path>', views.RawMetricsView.as_view(), name=views.RawMetricsView.view_name),
     re_path(r'^preprints/views/$', views.PreprintViewMetrics.as_view(), name=views.PreprintViewMetrics.view_name),
     re_path(r'^preprints/downloads/$', views.PreprintDownloadMetrics.as_view(), name=views.PreprintDownloadMetrics.view_name),
     re_path(r'^registries_moderation/transitions/$', views.RegistriesModerationMetricsView.as_view(), name=views.RegistriesModerationMetricsView.view_name),
diff --git a/api/metrics/views.py b/api/metrics/views.py
index c6e4d56c9b9..69c44027ec9 100644
--- a/api/metrics/views.py
+++ b/api/metrics/views.py
@@ -8,6 +8,7 @@
 
 from elasticsearch6.exceptions import NotFoundError, RequestError
 from elasticsearch6_dsl.connections import get_connection
+from elasticsearch_metrics.registry import djelme_registry
 
 from framework.auth.oauth_scopes import CoreScopes
 
@@ -225,24 +226,31 @@ def delete(self, request, *args, **kwargs):
         raise ValidationError('DELETE not supported. Use GET/POST/PUT')
 
     @require_switch(ENABLE_RAW_METRICS)
-    def get(self, request, *args, **kwargs):
-        connection = get_connection()
-        url_path = kwargs['url_path']
-        return JsonResponse(connection.transport.perform_request('GET', f'/{url_path}'))
+    def get(self, request, *args, djelme_backend_name, url_path, **kwargs):
+        connection = self._get_es_connection(djelme_backend_name)
+        _response = connection.transport.perform_request('GET', f'/{url_path}')
+        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
 
     @require_switch(ENABLE_RAW_METRICS)
-    def post(self, request, *args, **kwargs):
-        connection = get_connection()
-        url_path = kwargs['url_path']
+    def post(self, request, *args, djelme_backend_name, url_path, **kwargs):
+        connection = self._get_es_connection(djelme_backend_name)
         body = json.loads(request.body)
-        return JsonResponse(connection.transport.perform_request('POST', f'/{url_path}', body=body))
+        _response = connection.transport.perform_request('POST', f'/{url_path}', body=body)
+        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
 
     @require_switch(ENABLE_RAW_METRICS)
-    def put(self, request, *args, **kwargs):
-        connection = get_connection()
-        url_path = kwargs['url_path']
+    def put(self, request, *args, djelme_backend_name, url_path, **kwargs):
+        connection = self._get_es_connection(djelme_backend_name)
         body = json.loads(request.body)
-        return JsonResponse(connection.transport.perform_request('PUT', f'/{url_path}', body=body))
+        _response = connection.transport.perform_request('PUT', f'/{url_path}', body=body)
+        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
+
+    def _get_es_connection(self, djelme_backend_name):
+        try:
+            _backend = djelme_registry.get_backend(djelme_backend_name)
+        except LookupError:
+            raise Http404
+        return _backend.elastic_client
 
 
 class RegistriesModerationMetricsView(GenericAPIView):
diff --git a/api_tests/metrics/test_raw_metrics.py b/api_tests/metrics/test_raw_metrics.py
index 6a3b9b8f8c5..e32936d9024 100644
--- a/api_tests/metrics/test_raw_metrics.py
+++ b/api_tests/metrics/test_raw_metrics.py
@@ -1,10 +1,10 @@
 import pytest
-import time
 
 from website.app import setup_django
 setup_django()
 
 from waffle.testutils import override_switch
+from elasticsearch6_dsl.connections import connections as es6_connections
 
 from osf import features
 from osf_tests.factories import AuthUserFactory
@@ -40,9 +40,9 @@ def user(self):
     def other_user(self):
         return AuthUserFactory()
 
-    @pytest.fixture
-    def base_url(self):
-        return f'/{API_BASE}metrics/raw/'
+    @pytest.fixture(params=['raw', 'raw-osfmetrics_es6'])
+    def base_url(self, request):
+        return f'/{API_BASE}metrics/{request.param}/'
 
     def test_delete(self, app, user, base_url):
         res = app.delete_json_api(base_url, auth=user.auth, expect_errors=True)
@@ -136,7 +136,9 @@ def test_post_and_get(self, app, user, base_url):
         res = app.post_json_api(post_url, post_data, auth=user.auth)
         assert res.json == post_return
 
-        time.sleep(3)
+        es6_connections.get_connection('osfmetrics_es6').indices.refresh(
+            index='customer',
+        )
 
         get_url = f'{base_url}customer/_search?q=*'
         res = app.get(get_url, auth=user.auth)
diff --git a/poetry.lock b/poetry.lock
index c16b7d021e0..37f0a9a8292 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"]
 [package.source]
 type = "git"
 url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git"
-reference = "4e833670178beb682bb0d64e4f33db012cf8f014"
-resolved_reference = "4e833670178beb682bb0d64e4f33db012cf8f014"
+reference = "f2b92e5509389bb6c33f5a90c9ca4fe4e68187e2"
+resolved_reference = "f2b92e5509389bb6c33f5a90c9ca4fe4e68187e2"
 
 [[package]]
 name = "django-extensions"
@@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.12"
-content-hash = "d08b71fd886f9c6bd3d8d6cb1eda9f08431b7e84398b107e25f0371a4111266b"
+content-hash = "fe2cf66c0cc6f72e6d6191ac07a2e1ca874324afc19e2d073a51ce69422e75e5"
diff --git a/pyproject.toml b/pyproject.toml
index fcc0decc86d..9cdd094dde1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,7 +91,7 @@ datacite = "1.1.3"
 rdflib = "7.0.0"
 colorlog = "6.8.2"
 # Metrics
-django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "4e833670178beb682bb0d64e4f33db012cf8f014"}
+django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "f2b92e5509389bb6c33f5a90c9ca4fe4e68187e2"}
 # Impact Metrics CSV Export
 djangorestframework-csv = "3.0.2"
 gevent = "24.2.1"

From 4084a367f2cd8e2fa88d0ed46caab2556fc776e0 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 24 Apr 2026 12:38:21 -0400
Subject: [PATCH 067/100] better 6to8 error handling

---
 admin/management/views.py                     |  6 +++--
 admin/templates/management/commands.html      |  5 ++--
 .../commands/migrate_osfmetrics_6to8.py       | 25 ++++++++++---------
 osf/metadata/osfmap_utils.py                  |  8 ++++--
 4 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/admin/management/views.py b/admin/management/views.py
index c390d08e629..cdde3dfa7a3 100644
--- a/admin/management/views.py
+++ b/admin/management/views.py
@@ -6,7 +6,7 @@
 from django.http import HttpResponse
 from django.utils import timezone
 from django.contrib.auth.mixins import PermissionRequiredMixin
-from django.core.management import call_command, CommandError
+from django.core.management import call_command
 
 from osf.management.commands.manage_switch_flags import manage_waffle
 from osf.management.commands.update_registration_schemas import update_registration_schemas
@@ -190,6 +190,7 @@ class MigrateOsfmetrics6to8(ManagementCommandPermissionView):
     def post(self, request):
         _command_kwargs = {
             'no_setup': True,
+            'no_color': True,
             'no_counts': request.POST.get('no_counts'),
             'clear_state': request.POST.get('clear_state'),
             'start': request.POST.get('start'),
@@ -199,5 +200,6 @@ def post(self, request):
         }
         _out_io = StringIO()
         call_command('migrate_osfmetrics_6to8', **_command_kwargs, stdout=_out_io)
-        messages.info(request, _out_io.getvalue())
+        for _line in _out_io.getvalue().split('\n'):
+            messages.info(request, _line)
         return redirect(reverse('management:commands'))
diff --git a/admin/templates/management/commands.html b/admin/templates/management/commands.html
index ae4ea406b00..ceed7ac77e1 100644
--- a/admin/templates/management/commands.html
+++ b/admin/templates/management/commands.html
@@ -174,14 +174,15 @@ <h4><u>migrate osf-metrics 6to8</u></h4>
                       action="{% url 'management:migrate_osfmetrics_6to8'%}"
                       style="display: flex; flex-direction: column;">
                     {% csrf_token %}
+                    <label><input type="checkbox" name="start"> start tasks (caution)</label>
                     <label><input type="checkbox" name="no_counts"> no counts</label>
                     <label><input type="checkbox" name="clear_state"> reset migration start time (caution)</label>
-                    <label><input type="checkbox" name="start"> start tasks (caution)</label>
                     <fieldset>
-                        default all if unselected:
+                        (narrow types:
                         <label><input type="checkbox" name="unchanged"> unchanged events and reports</label>
                         <label><input type="checkbox" name="usage_events"> usage events</label>
                         <label><input type="checkbox" name="usage_reports"> usage reports</label>
+                        )
                     </fieldset>
                     <nav>
                         <input class="btn btn-success" type="submit" value="Run" />
diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index b77d7b6af92..392ebb330b0 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -11,7 +11,8 @@
 from elasticsearch6.exceptions import ConnectionError as Elastic6ConnectionError
 from elasticsearch6 import helpers as es6_helpers
 from elasticsearch6_dsl.connections import connections as es6_connections
-from elasticsearch8.exceptions import ConnectionError as Elastic8ConnectionError
+from elasticsearch8.exceptions import TransportError as Elastic8TransportError
+from elasticsearch8.helpers import BulkIndexError as Elastic8BulkIndexError
 from elasticsearch_metrics.registry import djelme_registry
 from elasticsearch_metrics.imps import elastic8 as djel8me
 from psycopg2 import OperationalError as PostgresOperationalError
@@ -63,7 +64,7 @@
     autoretry_for=(
         DjangoOperationalError,
         Elastic6ConnectionError,
-        Elastic8ConnectionError,
+        Elastic8TransportError,
         PostgresOperationalError,
     ),
     retry_backoff=True,  # exponential backoff, with jitter
@@ -87,7 +88,7 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
         _es8_recordtype(**_convert_kwargs(_hit['_source']))
         for _hit in _es6_scan_range(_es6_recordtype, until_when=until_when)
     )
-    return _es8_bulk_save(_es8_recordtype, _each_new)
+    _es8_bulk_save(_es8_recordtype, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -102,7 +103,7 @@ def migrate_counted_usages(from_when: str, until_when: str):
             addl_filter={'exists': {'field': 'item_guid'}},
         )
     )
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -115,7 +116,7 @@ def migrate_preprint_views(from_when: str, until_when: str):
             PreprintView, from_when=from_when, until_when=until_when
         )
     )
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -128,7 +129,7 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
             PreprintDownload, from_when=from_when, until_when=until_when
         )
     )
-    return _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
@@ -156,7 +157,7 @@ def _each_new():
                 )
             )
 
-    return _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new())
+    _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new())
 
 
 ###
@@ -168,11 +169,11 @@ def _es6_connection():
 
 
 def _es8_bulk_save(es8_recordtype, each_new_record):
-    _success_count, _fail_count = es8_recordtype.bulk(
-        each_new_record,
-        stats_only=True,
-    )
-    return _success_count
+    try:
+        es8_recordtype.bulk(each_new_record, stats_only=True)
+    except Elastic8BulkIndexError as _bulk_error:
+        # so actual errors show in celery task result
+        raise Exception(_bulk_error.errors) from _bulk_error
 
 
 def _date_range(
diff --git a/osf/metadata/osfmap_utils.py b/osf/metadata/osfmap_utils.py
index e3e9ab89a9c..031cd160eac 100644
--- a/osf/metadata/osfmap_utils.py
+++ b/osf/metadata/osfmap_utils.py
@@ -52,8 +52,12 @@ def osf_iri(guid_or_model):
     @param guid_or_model: a string, Guid instance, or another osf model instance
     @returns rdflib.URIRef or None
     """
-    guid = osfdb.base.coerce_guid(guid_or_model)
-    return OSFIO[guid._id]
+    _osfid: str = (
+        guid_or_model
+        if isinstance(guid_or_model, str)
+        else osfdb.base.coerce_guid(guid_or_model)._id
+    )
+    return OSFIO[_osfid]
 
 
 def osfid_from_iri(iri: str) -> str:

From ce857047560878c2dab7be1daa51e38c90687e09 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Fri, 24 Apr 2026 12:38:43 -0400
Subject: [PATCH 068/100] fewer osfmetrics indexes

---
 osf/metrics/es8_metrics.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 3bc573865c3..7118ab3cc28 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -108,6 +108,9 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     action_labels: list[str]
     pageview_info: PageviewInfo | None
 
+    class Meta:
+        timeseries_index_timedepth = MONTHLY
+
     @functools.cached_property
     def _osfid_referent(self):
         # for use by autofill methods, if needed
@@ -241,15 +244,9 @@ class RegistriesModerationMetricsEs8(djelme.EventRecord):
     user_id: str
     comment: str | None
 
-    class Index:
-        settings = {
-            'number_of_shards': 1,
-            'number_of_replicas': 1,
-            'refresh_interval': '1s',
-        }
-
     class Meta:
         timeseries_recordtype_name = 'RegistriesModerationMetrics'
+        timeseries_index_timedepth = MONTHLY
 
 
 ###
@@ -341,7 +338,7 @@ class InstitutionSummaryReportEs8(djelme.CyclicRecord):
     registered_projects: RegistrationRunningTotals
 
     class Meta:
-        timeseries_index_timedepth = MONTHLY
+        timeseries_index_timedepth = YEARLY
         timeseries_recordtype_name = 'InstitutionSummaryReport'
 
 
@@ -353,7 +350,7 @@ class NewUserDomainReportEs8(djelme.CyclicRecord):
     new_user_count: int
 
     class Meta:
-        timeseries_index_timedepth = MONTHLY
+        timeseries_index_timedepth = YEARLY
         timeseries_recordtype_name = 'NewUserDomainReport'
 
 
@@ -388,7 +385,7 @@ class PreprintSummaryReportEs8(djelme.CyclicRecord):
     preprint_count: int
 
     class Meta:
-        timeseries_index_timedepth = MONTHLY
+        timeseries_index_timedepth = YEARLY
         timeseries_recordtype_name = 'PreprintSummaryReport'
 
 
@@ -450,7 +447,7 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
     storage_byte_count: int = esdsl.mapped_field(esdsl.Long())
 
     class Meta:
-        timeseries_index_timedepth = MONTHLY
+        timeseries_index_timedepth = YEARLY
         timeseries_recordtype_name = 'InstitutionalUserReport'
 
 
@@ -500,7 +497,7 @@ class PublicItemUsageReportEs8(djelme.CyclicRecord):
     cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long())
 
     class Meta:
-        timeseries_index_timedepth = MONTHLY
+        timeseries_index_timedepth = YEARLY
         timeseries_recordtype_name = 'PublicItemUsageReport'
 
 

From c858e7b45d0f49fc2cf57745fe1feae01fbfba2b Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Wed, 22 Apr 2026 17:47:58 +0300
Subject: [PATCH 069/100] add es8 reports

---
 addons/base/views.py                          | 19 ++++
 api/metrics/serializers.py                    | 12 +++
 .../commands/monthly_reporters_go.py          |  4 +-
 osf/metrics/reporters/download_count.py       | 19 ++--
 osf/metrics/reporters/institution_summary.py  | 77 +++++++++++++---
 .../reporters/institution_summary_monthly.py  | 23 ++++-
 osf/metrics/reporters/institutional_users.py  | 29 +++++-
 osf/metrics/reporters/new_user_domain.py      | 19 ++--
 osf/metrics/reporters/node_count.py           | 75 ++++++++++++++--
 .../reporters/osfstorage_file_count.py        | 29 ++++--
 osf/metrics/reporters/preprint_count.py       | 18 ++--
 osf/metrics/reporters/private_spam_metrics.py | 23 ++++-
 osf/metrics/reporters/public_item_usage.py    | 52 ++++++++---
 osf/metrics/reporters/spam_count.py           | 27 +++++-
 osf/metrics/reporters/storage_addon_usage.py  | 88 ++++++++++++++-----
 osf/metrics/reporters/user_count.py           | 21 ++++-
 16 files changed, 444 insertions(+), 91 deletions(-)

diff --git a/addons/base/views.py b/addons/base/views.py
index ebcd662966b..4352eb65f30 100644
--- a/addons/base/views.py
+++ b/addons/base/views.py
@@ -34,6 +34,7 @@
 from framework.flask import redirect
 from framework.sentry import log_exception
 from framework.transactions.handlers import no_auto_transaction
+from osf.metrics.es8_metrics import OsfCountedUsageRecord
 from website import settings
 from addons.base import signals as file_signals
 from addons.base.utils import format_last_known_metadata, get_mfr_url
@@ -691,6 +692,15 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
                 version=fileversion.identifier,
                 path=file_node.path,
             )
+            OsfCountedUsageRecord.record(
+                count=1,
+                preprint_id=resource._id,
+                user_id=getattr(auth.user, '_id', None),
+                provider_id=resource.provider._id,
+                database_iri=resource.get_semantic_iri(),
+                version=fileversion.identifier,
+                path=file_node.path,
+            )
         except es_exceptions.ConnectionError:
             log_exception()
 
@@ -718,6 +728,15 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
                 version=fileversion.identifier,
                 path=file_node.path,
             )
+            OsfCountedUsageRecord.record(
+                count=1,
+                preprint_id=resource._id,
+                user_id=getattr(auth.user, '_id', None),
+                provider_id=resource.provider._id,
+                database_iri=resource.get_semantic_iri(),
+                version=fileversion.identifier,
+                path=file_node.path,
+            )
         except es_exceptions.ConnectionError:
             log_exception()
 
diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 5bbde293505..93c0c2b69bc 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -6,6 +6,8 @@
 from api.base.serializers import BaseAPISerializer
 from api.base.utils import absolute_reverse
 from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
+from osf.metrics.es8_metrics import OsfCountedUsageRecord
+from osf.models import Guid
 from website import settings as website_settings
 
 logger = logging.getLogger(__name__)
@@ -66,6 +68,16 @@ def create(self, validated_data):
         pageview_info = None
         if pageview_info_data := validated_data.get('pageview_info'):
             pageview_info = PageviewInfo(**pageview_info_data)
+        OsfCountedUsageRecord.record(
+            platform_iri=website_settings.DOMAIN,
+            provider_id=validated_data.get('provider_id'),
+            item_osfid=validated_data.get('item_guid'),
+            sessionhour_id=validated_data['session_id'],
+            user_is_authenticated=validated_data['user_is_authenticated'],
+            action_labels=validated_data.get('action_labels'),
+            pageview_info=pageview_info,
+        )
+
         return CountedAuthUsage.record(
             platform_iri=website_settings.DOMAIN,
             provider_id=validated_data.get('provider_id'),
diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py
index 218b45da1df..6e2b1c9bc72 100644
--- a/osf/management/commands/monthly_reporters_go.py
+++ b/osf/management/commands/monthly_reporters_go.py
@@ -85,8 +85,8 @@ def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict):
         framework.sentry.log_exception(exc)
         return
 
-    _report = _reporter.report(**report_kwargs)
-    if _report is not None:
+    _reports = _reporter.report(**report_kwargs)
+    for _report in _reports:
         _report.report_yearmonth = _reporter.yearmonth
         _report.save()
         _followup_task = _reporter.followup_task(_report)
diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py
index f772722dc31..d6f6cea84af 100644
--- a/osf/metrics/reporters/download_count.py
+++ b/osf/metrics/reporters/download_count.py
@@ -1,14 +1,21 @@
 from osf.models import PageCounter
 from osf.metrics.reports import DownloadCountReport
 from ._base import DailyReporter
+from osf.metrics.es8_metrics import DownloadCountReportEs8
 
 
 class DownloadCountReporter(DailyReporter):
     def report(self, date):
         download_count = int(PageCounter.get_all_downloads_on_date(date) or 0)
-        return [
-            DownloadCountReport(
-                daily_file_downloads=download_count,
-                report_date=date,
-            ),
-        ]
+        reports = []
+        report_es8 = DownloadCountReportEs8(
+            cycle_coverage=f"{date:%Y.%m.%d}",
+            daily_file_downloads=download_count,
+        )
+        reports.append(report_es8)
+        report = DownloadCountReport(
+            daily_file_downloads=report_es8.daily_file_downloads,
+             report_date=date,
+        )
+        reports.append(report)
+        return reports
diff --git a/osf/metrics/reporters/institution_summary.py b/osf/metrics/reporters/institution_summary.py
index 892e337aec4..d691dd8b803 100644
--- a/osf/metrics/reporters/institution_summary.py
+++ b/osf/metrics/reporters/institution_summary.py
@@ -10,7 +10,12 @@
 )
 from osf.models import Institution
 from ._base import DailyReporter
-
+from osf.metrics.es8_metrics import (
+    InstitutionSummaryReportEs8,
+    RunningTotal as RunningTotalEs8,
+    NodeRunningTotals as NodeRunningTotalsEs8,
+    RegistrationRunningTotals as RegistrationRunningTotalsEs8
+)
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -39,16 +44,15 @@ def report(self, date):
                 created__date__lte=date,
                 type='osf.registration',
             )
-
-            report = InstitutionSummaryReport(
-                report_date=date,
+            report_es8 = InstitutionSummaryReportEs8(
+                cycle_coverage=f"{date:%Y.%m.%d}",
                 institution_id=institution._id,
                 institution_name=institution.name,
-                users=RunningTotal(
+                users=RunningTotalEs8(
                     total=institution.get_institution_users().filter(is_active=True).count(),
                     total_daily=institution.get_institution_users().filter(date_confirmed__date=date).count(),
                 ),
-                nodes=NodeRunningTotals(
+                nodes=NodeRunningTotalsEs8(
                     total=node_qs.count(),
                     public=node_qs.filter(public_query).count(),
                     private=node_qs.filter(private_query).count(),
@@ -58,7 +62,7 @@ def report(self, date):
                     private_daily=node_qs.filter(private_query & daily_query).count(),
                 ),
                 # Projects use get_roots to remove children
-                projects=NodeRunningTotals(
+                projects=NodeRunningTotalsEs8(
                     total=node_qs.get_roots().count(),
                     public=node_qs.filter(public_query).get_roots().count(),
                     private=node_qs.filter(private_query).get_roots().count(),
@@ -67,7 +71,7 @@ def report(self, date):
                     public_daily=node_qs.filter(public_query & daily_query).get_roots().count(),
                     private_daily=node_qs.filter(private_query & daily_query).get_roots().count(),
                 ),
-                registered_nodes=RegistrationRunningTotals(
+                registered_nodes=RegistrationRunningTotalsEs8(
                     total=registration_qs.count(),
                     public=registration_qs.filter(public_query).count(),
                     embargoed=registration_qs.filter(private_query).count(),
@@ -78,7 +82,7 @@ def report(self, date):
                     embargoed_daily=registration_qs.filter(private_query & daily_query).count(),
                     embargoed_v2_daily=registration_qs.filter(private_query & daily_query & embargo_v2_query).count(),
                 ),
-                registered_projects=RegistrationRunningTotals(
+                registered_projects=RegistrationRunningTotalsEs8(
                     total=registration_qs.get_roots().count(),
                     public=registration_qs.filter(public_query).get_roots().count(),
                     embargoed=registration_qs.filter(private_query).get_roots().count(),
@@ -87,7 +91,60 @@ def report(self, date):
                     total_daily=registration_qs.filter(daily_query).get_roots().count(),
                     public_daily=registration_qs.filter(public_query & daily_query).get_roots().count(),
                     embargoed_daily=registration_qs.filter(private_query & daily_query).get_roots().count(),
-                    embargoed_v2_daily=registration_qs.filter(private_query & daily_query & embargo_v2_query).get_roots().count(),
+                    embargoed_v2_daily=registration_qs.filter(
+                        private_query & daily_query & embargo_v2_query).get_roots().count(),
+                ),
+            )
+            reports.append(report_es8)
+
+            report = InstitutionSummaryReport(
+                report_date=date,
+                institution_id=institution._id,
+                institution_name=institution.name,
+                users=RunningTotal(
+                    total=report_es8.users.total,
+                    total_daily=report_es8.users.total_daily,
+                ),
+                nodes=NodeRunningTotals(
+                    total=report_es8.nodes.total,
+                    public=report_es8.nodes.public,
+                    private=report_es8.nodes.private,
+
+                    total_daily=report_es8.nodes.total_daily,
+                    public_daily=report_es8.nodes.public_daily,
+                    private_daily=report_es8.nodes.private_daily,
+                ),
+                # Projects use get_roots to remove children
+                projects=NodeRunningTotals(
+                    total=report_es8.projects.total,
+                    public=report_es8.projects.public,
+                    private=report_es8.projects.private,
+
+                    total_daily=report_es8.projects.total_daily,
+                    public_daily=report_es8.projects.public_daily,
+                    private_daily=report_es8.projects.private_daily,
+                ),
+                registered_nodes=RegistrationRunningTotals(
+                    total=report_es8.registered_nodes.total,
+                    public=report_es8.registered_nodes.public,
+                    embargoed=report_es8.registered_nodes.embargoed,
+                    embargoed_v2=report_es8.registered_nodes.embargoed_v2,
+
+                    total_daily=report_es8.registered_nodes.total_daily,
+                    public_daily=report_es8.registered_nodes.public_daily,
+                    embargoed_daily=report_es8.registered_nodes.embargoed_daily,
+                    embargoed_v2_daily=report_es8.registered_nodes.embargoed_v2_daily,
+                ),
+                registered_projects=RegistrationRunningTotals(
+                    total=report_es8.registered_projects.total,
+                    public=report_es8.registered_projects.public,
+                    embargoed=report_es8.registered_projects.embargoed,
+                    embargoed_v2=report_es8.registered_projects.embargoed_v2,
+
+                    total_daily=report_es8.registered_projects.total_daily,
+                    public_daily=report_es8.registered_projects.public_daily,
+                    embargoed_daily=report_es8.registered_projects.embargoed_daily,
+                    embargoed_v2_daily=report_es8.registered_projects.embargoed_v2_daily,
                 ),
             )
 
diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index 4748860db32..bf029aeff12 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -6,6 +6,7 @@
 from addons.osfstorage.models import OsfStorageFile
 from osf.metrics.reports import InstitutionMonthlySummaryReport
 from ._base import MonthlyReporter
+from osf.metrics.es8_metrics import InstitutionMonthlySummaryReportEs8
 
 
 class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
@@ -31,8 +32,9 @@ def generate_report(self, institution):
         )
 
         preprint_queryset = self.get_published_preprints(institution, self.yearmonth)
-
-        return InstitutionMonthlySummaryReport(
+        reports = []
+        report_es8 = InstitutionMonthlySummaryReportEs8(
+            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
             institution_id=institution._id,
             user_count=institution.get_institution_users().count(),
             private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False),
@@ -45,6 +47,23 @@ def generate_report(self, institution):
             monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, self.yearmonth),
             monthly_active_user_count=self.get_monthly_active_user_count(institution, self.yearmonth),
         )
+        reports.append(report_es8)
+
+        report = InstitutionMonthlySummaryReport(
+            institution_id=report_es8.institution_id,
+            user_count=report_es8.user_count,
+            private_project_count=report_es8.private_project_count,
+            public_project_count=report_es8.public_project_count,
+            public_registration_count=report_es8.public_registration_count,
+            embargoed_registration_count=report_es8.embargoed_registration_count,
+            published_preprint_count=report_es8.published_preprint_count,
+            storage_byte_count=report_es8.storage_byte_count,
+            public_file_count=report_es8.public_file_count,
+            monthly_logged_in_user_count=report_es8.monthly_logged_in_user_count,
+            monthly_active_user_count=report_es8.monthly_active_user_count,
+        )
+        reports.append(report)
+        return reports
 
     def _get_count(self, node_queryset, node_type, is_public):
         return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count()
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index 512472a3d96..1a1289657b3 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -1,4 +1,5 @@
 import dataclasses
+from typing import List
 
 from django.contrib.contenttypes.models import ContentType
 from django.db.models import Q, F, Sum
@@ -9,6 +10,7 @@
 from osf.metrics.reports import InstitutionalUserReport
 from osf.metrics.utils import YearMonth
 from ._base import MonthlyReporter
+from osf.metrics.es8_metrics import InstitutionalUserReportEs8
 
 
 class InstitutionalUsersReporter(MonthlyReporter):
@@ -38,7 +40,7 @@ def report(self, **report_kwargs):
         _institution = osfdb.Institution.objects.get(pk=report_kwargs['institution_pk'])
         _user = osfdb.OSFUser.objects.get(pk=report_kwargs['user_pk'])
         _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth)
-        return _helper.report
+        return _helper.reports
 
 
 # helper
@@ -47,11 +49,13 @@ class _InstiUserReportHelper:
     institution: osfdb.Institution
     user: osfdb.OSFUser
     yearmonth: YearMonth
-    report: InstitutionalUserReport = dataclasses.field(init=False)
+    reports: List[InstitutionalUserReport | InstitutionalUserReportEs8] = dataclasses.field(init=False)
 
     def __post_init__(self):
         _affiliation = self.user.get_institution_affiliation(self.institution._id)
-        self.report = InstitutionalUserReport(
+        self.reports = []
+        report_es8 = InstitutionalUserReportEs8(
+            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
             institution_id=self.institution._id,
             user_id=self.user._id,
             user_name=self.user.fullname,
@@ -72,6 +76,25 @@ def __post_init__(self):
             published_preprint_count=self._published_preprint_queryset().count(),
             storage_byte_count=self._storage_byte_count(),
         )
+        self.reports.append(report_es8)
+        report = InstitutionalUserReport(
+            institution_id=report_es8.institution_id,
+            user_id=report_es8.user_id,
+            user_name=report_es8.user_name,
+            department_name=report_es8.department_name,
+            month_last_login=report_es8.month_last_login,
+            month_last_active=report_es8.month_last_active,
+            account_creation_date=report_es8.account_creation_date,
+            orcid_id=report_es8.orcid_id,
+            public_project_count=report_es8.public_project_count,
+            private_project_count=report_es8.private_project_count,
+            public_registration_count=report_es8.public_registration_count,
+            embargoed_registration_count=report_es8.embargoed_registration_count,
+            public_file_count=report_es8.public_file_count,
+            published_preprint_count=report_es8.published_preprint_count,
+            storage_byte_count=report_es8.storage_byte_count,
+        )
+        self.reports.append(report)
 
     @property
     def before_datetime(self):
diff --git a/osf/metrics/reporters/new_user_domain.py b/osf/metrics/reporters/new_user_domain.py
index ec13aad860f..d6bdec2aa6e 100644
--- a/osf/metrics/reporters/new_user_domain.py
+++ b/osf/metrics/reporters/new_user_domain.py
@@ -4,6 +4,7 @@
 from osf.models import OSFUser
 from osf.metrics.reports import NewUserDomainReport
 from ._base import DailyReporter
+from osf.metrics.es8_metrics import NewUserDomainReportEs8
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -20,11 +21,19 @@ def report(self, date):
             email.split('@')[-1]
             for email in new_user_emails
         )
-        return [
-            NewUserDomainReport(
-                report_date=date,
+        reports = []
+        for domain_name, count in domain_names.items():
+            report_es8 = NewUserDomainReportEs8(
+                cycle_coverage=f"{date:%Y.%m.%d}",
                 domain_name=domain_name,
                 new_user_count=count,
             )
-            for domain_name, count in domain_names.items()
-        ]
+            reports.append(report_es8)
+
+            report = NewUserDomainReport(
+                report_date=date,
+                domain_name=report_es8.domain_name,
+                new_user_count=report_es8.new_user_count,
+            )
+            reports.append(report)
+        return reports
diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py
index 0a4120ca1f9..bb4dcbb165a 100644
--- a/osf/metrics/reporters/node_count.py
+++ b/osf/metrics/reporters/node_count.py
@@ -8,7 +8,11 @@
     RegistrationRunningTotals,
 )
 from ._base import DailyReporter
-
+from osf.metrics.es8_metrics import (
+    NodeSummaryReportEs8,
+    NodeRunningTotals as NodeRunningTotalsEs8,
+RegistrationRunningTotals as RegistrationRunningTotalsEs8
+)
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -35,11 +39,11 @@ def report(self, date):
         embargo_v2_query = Q(root__embargo__end_date__date__gt=date)
 
         exclude_spam = ~Q(spam_status__in=[SpamStatus.SPAM, SpamStatus.FLAGGED])
-
-        report = NodeSummaryReport(
-            report_date=date,
+        reports = []
+        report_es8 = NodeSummaryReportEs8(
+            cycle_coverage=f"{date:%Y.%m.%d}",
             # Nodes - the number of projects and components
-            nodes=NodeRunningTotals(
+            nodes=NodeRunningTotalsEs8(
                 total=node_qs.count(),
                 total_excluding_spam=node_qs.filter(exclude_spam).count(),
                 public=node_qs.filter(public_query).count(),
@@ -50,7 +54,7 @@ def report(self, date):
                 private_daily=node_qs.filter(private_query & created_today_query).count(),
             ),
             # Projects - the number of top-level only projects
-            projects=NodeRunningTotals(
+            projects=NodeRunningTotalsEs8(
                 total=node_qs.get_roots().count(),
                 total_excluding_spam=node_qs.get_roots().filter(exclude_spam).count(),
                 public=node_qs.filter(public_query).get_roots().count(),
@@ -61,7 +65,7 @@ def report(self, date):
                 private_daily=node_qs.filter(private_query & created_today_query).get_roots().count(),
             ),
             # Registered Nodes - the number of registered projects and components
-            registered_nodes=RegistrationRunningTotals(
+            registered_nodes=RegistrationRunningTotalsEs8(
                 total=registration_qs.count(),
                 public=registration_qs.filter(public_query).count(),
                 embargoed=registration_qs.filter(private_query).count(),
@@ -75,7 +79,7 @@ def report(self, date):
 
             ),
             # Registered Projects - the number of registered top level projects
-            registered_projects=RegistrationRunningTotals(
+            registered_projects=RegistrationRunningTotalsEs8(
                 total=registration_qs.get_roots().count(),
                 public=registration_qs.filter(public_query).get_roots().count(),
                 embargoed=registration_qs.filter(private_query).get_roots().count(),
@@ -88,5 +92,58 @@ def report(self, date):
                 withdrawn_daily=registration_qs.filter(retracted_query & retracted_today_query).get_roots().count(),
             ),
         )
+        reports.append(report_es8)
+        report = NodeSummaryReport(
+            report_date=date,
+            # Nodes - the number of projects and components
+            nodes=NodeRunningTotals(
+                total=report_es8.nodes.total,
+                total_excluding_spam=report_es8.nodes.total_excluding_spam,
+                public=report_es8.nodes.public,
+                private=report_es8.nodes.private,
+                total_daily=report_es8.nodes.total_daily,
+                total_daily_excluding_spam=report_es8.nodes.total_daily_excluding_spam,
+                public_daily=report_es8.nodes.public_daily,
+                private_daily=report_es8.nodes.private_daily,
+            ),
+            # Projects - the number of top-level only projects
+            projects=NodeRunningTotals(
+                total=report_es8.projects.total,
+                total_excluding_spam=report_es8.projects.total_excluding_spam,
+                public=report_es8.projects.public,
+                private=report_es8.projects.private,
+                total_daily=report_es8.projects.total_daily,
+                total_daily_excluding_spam=report_es8.projects.total_daily_excluding_spam,
+                public_daily=report_es8.projects.public_daily,
+                private_daily=report_es8.projects.private_daily,
+            ),
+            # Registered Nodes - the number of registered projects and components
+            registered_nodes=RegistrationRunningTotals(
+               total=report_es8.registered_nodes.total,
+                public=report_es8.registered_nodes.public,
+                embargoed=report_es8.registered_nodes.embargoed,
+                embargoed_v2=report_es8.registered_nodes.embargoed_v2,
+                withdrawn=report_es8.registered_nodes.withdrawn,
+                total_daily=report_es8.registered_nodes.total_daily,
+                public_daily=report_es8.registered_nodes.public_daily,
+                embargoed_daily=report_es8.registered_nodes.embargoed_daily,
+                embargoed_v2_daily=report_es8.registered_nodes.embargoed_v2_daily,
+                withdrawn_daily=report_es8.registered_nodes.withdrawn_daily,
+            ),
+            # Registered Projects - the number of registered top level projects
+            registered_projects=RegistrationRunningTotals(
+              total=report_es8.registered_projects.total,
+                public=report_es8.registered_projects.public,
+                embargoed=report_es8.registered_projects.embargoed,
+                embargoed_v2=report_es8.registered_projects.embargoed_v2,
+                withdrawn=report_es8.registered_projects.withdrawn,
+                total_daily=report_es8.registered_projects.total_daily,
+                public_daily=report_es8.registered_projects.public_daily,
+                embargoed_daily=report_es8.registered_projects.embargoed_daily,
+                embargoed_v2_daily=report_es8.registered_projects.embargoed_v2_daily,
+                withdrawn_daily=report_es8.registered_projects.withdrawn_daily,
+            ),
+        )
+        reports.append(report)
 
-        return [report]
+        return reports
diff --git a/osf/metrics/reporters/osfstorage_file_count.py b/osf/metrics/reporters/osfstorage_file_count.py
index 2f35e1e81fd..ea0f5e65b08 100644
--- a/osf/metrics/reporters/osfstorage_file_count.py
+++ b/osf/metrics/reporters/osfstorage_file_count.py
@@ -5,7 +5,10 @@
 from osf.metrics.reports import OsfstorageFileCountReport, FileRunningTotals
 from osf.models import AbstractNode, Preprint
 from ._base import DailyReporter
-
+from osf.metrics.es8_metrics import (
+    OsfstorageFileCountReportEs8,
+    FileRunningTotals as FileRunningTotalsEs8
+)
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -32,9 +35,11 @@ def report(self, date):
 
         daily_query = Q(created__date=date)
 
-        report = OsfstorageFileCountReport(
-            report_date=date,
-            files=FileRunningTotals(
+        reports = []
+
+        report_es8 = OsfstorageFileCountReportEs8(
+            cycle_coverage=f"{date:%Y.%m.%d}",
+            files=FileRunningTotalsEs8(
                 total=file_qs.count(),
                 public=file_qs.filter(public_query).count(),
                 private=file_qs.filter(private_query).count(),
@@ -43,5 +48,19 @@ def report(self, date):
                 private_daily=file_qs.filter(private_query & daily_query).count(),
             ),
         )
+        reports.append(report_es8)
+
+        report = OsfstorageFileCountReport(
+            report_date=date,
+            files=FileRunningTotals(
+                total=report_es8.files.total,
+                public=report_es8.files.public,
+                private=report_es8.files.private,
+                total_daily=report_es8.files.total_daily,
+                public_daily=report_es8.files.public_daily,
+                private_daily=report_es8.files.private_daily,
+            ),
+        )
+        reports.append(report)
 
-        return [report]
+        return reports
diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py
index 23f68bc7736..6d8caf2cc03 100644
--- a/osf/metrics/reporters/preprint_count.py
+++ b/osf/metrics/reporters/preprint_count.py
@@ -4,6 +4,7 @@
 from osf.metrics import PreprintSummaryReport
 from website import settings
 from ._base import DailyReporter
+from osf.metrics.es8_metrics import PreprintSummaryReportEs8
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -48,13 +49,20 @@ def report(self, date):
         for preprint_provider in PreprintProvider.objects.all():
             elastic_query = get_elastic_query(date, preprint_provider)
             resp = requests.post(f'{settings.SHARE_URL}api/v2/search/creativeworks/_search', json=elastic_query).json()
-            reports.append(
-                PreprintSummaryReport(
+
+            report_es8 = PreprintSummaryReportEs8(
+                cycle_coverage=f"{date:%Y.%m.%d}",
+                provider_key=preprint_provider._id,
+                preprint_count=resp['hits']['total'],
+            )
+            reports.append(report_es8)
+
+            report =  PreprintSummaryReport(
                     report_date=date,
-                    provider_key=preprint_provider._id,
-                    preprint_count=resp['hits']['total'],
+                    provider_key=report_es8.provider_key,
+                    preprint_count=report_es8.preprint_count,
                 )
-            )
+            reports.append(report)
             logger.info('{} Preprints counted for the provider {}'.format(resp['hits']['total'], preprint_provider.name))
 
         return reports
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 40f259af325..cccd08eb49e 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -2,6 +2,8 @@
 from osf.external.oopspam.client import OOPSpamClient
 from osf.external.askismet.client import AkismetClient
 from ._base import MonthlyReporter
+from osf.metrics.es8_metrics import PrivateSpamMetricsReportEs8
+
 
 class PrivateSpamMetricsReporter(MonthlyReporter):
     report_name = 'Private Spam Metrics'
@@ -13,7 +15,10 @@ def report(self):
         oopspam_client = OOPSpamClient()
         akismet_client = AkismetClient()
 
-        report = PrivateSpamMetricsReport(
+        reports = []
+
+        report_es8 = PrivateSpamMetricsReportEs8(
+            cycle_coverage=f"{self.yearmonth:%Y.%m.1}",
             report_yearmonth=str(self.yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
@@ -24,5 +29,19 @@ def report(self):
             preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'),
             preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint')
         )
+        reports.append(report_es8)
+
+        report = PrivateSpamMetricsReport(
+            report_yearmonth=str(self.yearmonth),
+            node_oopspam_flagged=report_es8.node_oopspam_flagged,
+            node_oopspam_hammed=report_es8.node_oopspam_hammed,
+            node_akismet_flagged=report_es8.node_akismet_flagged,
+            node_akismet_hammed=report_es8.node_akismet_hammed,
+            preprint_oopspam_flagged=report_es8.preprint_oopspam_flagged,
+            preprint_oopspam_hammed=report_es8.preprint_oopspam_hammed,
+            preprint_akismet_flagged=report_es8.preprint_akismet_flagged,
+            preprint_akismet_hammed=report_es8.preprint_akismet_hammed,
+        )
+        reports.append(report)
 
-        return report
+        return reports
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index 7df405d385f..adec89de99e 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -3,6 +3,9 @@
 import typing
 
 import waffle
+
+from osf.metrics.es8_metrics import PublicItemUsageReportEs8
+
 if typing.TYPE_CHECKING:
     import elasticsearch6_dsl as edsl
 
@@ -131,16 +134,27 @@ def _preprintdownload_osfids(self, after_osfid: str | None) -> typing.Iterator[s
         )
         return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
-    def _init_report(self, osf_obj) -> PublicItemUsageReport:
+    def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | PublicItemUsageReportEs8]:
         if not _is_item_public(osf_obj):
             raise _SkipItem
-        return PublicItemUsageReport(
+        reports = []
+        report_es8 = PublicItemUsageReportEs8(
+            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
             item_osfid=osf_obj._id,
             item_type=[get_item_type(osf_obj)],
             provider_id=[get_provider_id(osf_obj)],
             platform_iri=[website_settings.DOMAIN],
+        )
+        reports.append(report_es8)
+        report = PublicItemUsageReport(
+            item_osfid=report_es8.item_osfid,
+            item_type=report_es8.item_type,
+            provider_id=report_es8.provider_id,
+            platform_iri=report_es8.platform_iri,
             # leave counts null; will be set if there's data
         )
+        reports.append(report)
+        return reports
 
     def _fill_report_counts(self, report, osf_obj):
         if (
@@ -154,31 +168,43 @@ def _fill_report_counts(self, report, osf_obj):
             (
                 report.view_count,
                 report.view_session_count,
-            ) = self._countedusage_view_counts(osf_obj)
+            ) = self._countedusage_view_counts(osf_obj, cumulative=False)
             (
                 report.download_count,
                 report.download_session_count,
-            ) = self._countedusage_download_counts(osf_obj)
+            ) = self._countedusage_download_counts(osf_obj, cumulative=False)
+
+            (
+                report.cumulative_view_count,
+                report.cumulative_view_session_count,
+            ) = self._countedusage_view_counts(osf_obj, cumulative=True)
+
+            (
+                report.cumulative_download_count,
+                report.cumulative_download_session_count,
+            ) = self._countedusage_download_counts(osf_obj, cumulative=True)
 
-    def _base_usage_search(self):
+    def _base_usage_search(self, cumulative: bool = False):
+        timestamp_filter = {
+                'lt': self.yearmonth.month_end(),
+            }
+        if not cumulative:
+            timestamp_filter['gte'] = self.yearmonth.month_start()
         return (
             CountedAuthUsage.search()
             .filter('term', item_public=True)
-            .filter('range', timestamp={
-                'gte': self.yearmonth.month_start(),
-                'lt': self.yearmonth.month_end(),
-            })
+            .filter('range', timestamp=timestamp_filter)
             .extra(size=0)  # only aggregations, no hits
         )
 
-    def _countedusage_view_counts(self, osf_obj) -> tuple[int, int]:
+    def _countedusage_view_counts(self, osf_obj, cumulative: bool = False) -> tuple[int, int]:
         '''compute view_session_count separately to avoid double-counting
 
         (the same session may be represented in both the composite agg on `item_guid`
         and that on `surrounding_guids`)
         '''
         _search = (
-            self._base_usage_search()
+            self._base_usage_search(cumulative=cumulative)
             .query(
                 'bool',
                 filter=[
@@ -206,10 +232,10 @@ def _countedusage_view_counts(self, osf_obj) -> tuple[int, int]:
         )
         return (_view_count, _view_session_count)
 
-    def _countedusage_download_counts(self, osf_obj) -> tuple[int, int]:
+    def _countedusage_download_counts(self, osf_obj, cumulative: bool = False) -> tuple[int, int]:
         '''aggregate downloads on each osfid (not including components/files)'''
         _search = (
-            self._base_usage_search()
+            self._base_usage_search(cumulative=cumulative)
             .filter('term', item_guid=osf_obj._id)
             .filter('term', action_labels=CountedAuthUsage.ActionLabel.DOWNLOAD.value)
         )
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index 319381fe899..b6bc4b6e152 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -4,6 +4,7 @@
 from ._base import MonthlyReporter
 from osf.models import PreprintLog, NodeLog
 from osf.models.spam import SpamStatus
+from osf.metrics.es8_metrics import SpamSummaryReportEs8
 
 class SpamCountReporter(MonthlyReporter):
 
@@ -11,9 +12,9 @@ def report(self, **report_kwargs):
         assert not report_kwargs
         target_month = self.yearmonth.month_start()
         next_month = self.yearmonth.month_end()
-
-        return SpamSummaryReport(
-            # Node Log entries
+        reports = []
+        report_es8 = SpamSummaryReportEs8(
+            cycle_coverage=f"{self.yearmonth:%Y.%m}",
             node_confirmed_spam=NodeLog.objects.filter(
                 action=NodeLog.CONFIRM_SPAM,
                 created__gt=target_month,
@@ -79,3 +80,23 @@ def report(self, **report_kwargs):
                 created__lt=next_month,
             ).count()
         )
+        reports.append(report_es8)
+        report = SpamSummaryReport(
+            # Node Log entries
+            node_confirmed_spam=report_es8.node_confirmed_spam,
+            node_confirmed_ham=report_es8.node_confirmed_ham,
+            node_flagged=report_es8.node_flagged,
+            # Registration Log entries
+            registration_confirmed_spam=report_es8.registration_confirmed_spam,
+            registration_confirmed_ham=report_es8.registration_confirmed_ham,
+            registration_flagged=report_es8.registration_flagged,
+            # Preprint Log entries
+            preprint_confirmed_spam=report_es8.preprint_confirmed_spam,
+            preprint_confirmed_ham=report_es8.preprint_confirmed_ham,
+            preprint_flagged=report_es8.preprint_flagged,
+            # New Users marked as Spam/Ham
+            user_marked_as_spam=report_es8.user_marked_as_spam,
+            user_marked_as_ham=report_es8.user_marked_as_ham,
+        )
+        reports.append(report)
+        return reports
diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index 704254795f0..d8974c19cef 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -14,6 +14,11 @@
 from osf.models import SpamStatus, Tag
 from website import settings
 from ._base import DailyReporter
+from osf.metrics.es8_metrics import (
+    StorageAddonUsageEs8,
+    UsageByStorageAddon as UsageByStorageAddonEs8,
+    RunningTotal as RunningTotalEs8
+)
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -125,45 +130,84 @@ def report(self, date):
             if 'storage' in addon_config.categories
         }
 
+        usage_by_addon_es8 = []
         usage_by_addon = []
         for short_name, addon_config in storage_addon_configs.items():
             user_counts = storage_addon_user_counts(date, addon_config.get_model('UserSettings'))
             node_counts = storage_addon_node_counts(date, addon_config.get_model('NodeSettings'))
-
-            usage_by_addon.append(
-                UsageByStorageAddon(
-                    addon_shortname=short_name,
+            usage_by_storage_addon_es_8 = UsageByStorageAddonEs8(
+                addon_shortname=short_name,
+                enabled_usersettings=RunningTotalEs8(
+                    total=user_counts.get('enabled_total', 0),
+                    total_daily=user_counts.get('enabled_daily', 0),
+                ),
+                deleted_usersettings=RunningTotalEs8(
+                    total=user_counts.get('deleted_total', 0),
+                    total_daily=user_counts.get('deleted_daily', 0),
+                ),
+                linked_usersettings=RunningTotalEs8(
+                    total=user_counts.get('linked_total', 0),
+                    total_daily=user_counts.get('linked_daily', 0),
+                ),
+                usersetting_links=RunningTotalEs8(
+                    total=user_counts.get('link_count_total', 0),
+                    total_daily=user_counts.get('link_count_daily', 0),
+                ),
+                connected_nodesettings=RunningTotalEs8(
+                    total=node_counts.get('connected_total', 0),
+                    total_daily=node_counts.get('connected_daily', 0),
+                ),
+                disconnected_nodesettings=RunningTotalEs8(
+                    total=node_counts.get('disconnected_total', 0),
+                    total_daily=node_counts.get('disconnected_daily', 0),
+                ),
+                deleted_nodesettings=RunningTotalEs8(
+                    total=node_counts.get('deleted_total', 0),
+                    total_daily=node_counts.get('deleted_daily', 0),
+                ),
+            )
+            usage_by_addon_es8.append(usage_by_storage_addon_es_8)
+            usage_by_storage_addon = UsageByStorageAddon(
+                    addon_shortname=usage_by_storage_addon_es_8.short_name,
                     enabled_usersettings=RunningTotal(
-                        total=user_counts.get('enabled_total', 0),
-                        total_daily=user_counts.get('enabled_daily', 0),
+                        total=usage_by_storage_addon_es_8.enabled_usersettings.total,
+                        total_daily=usage_by_storage_addon_es_8.enabled_usersettings.total_daily,
                     ),
                     deleted_usersettings=RunningTotal(
-                        total=user_counts.get('deleted_total', 0),
-                        total_daily=user_counts.get('deleted_daily', 0),
+                        total=usage_by_storage_addon_es_8.deleted_usersettings.total,
+                        total_daily=usage_by_storage_addon_es_8.deleted_usersettings.total_daily,
                     ),
                     linked_usersettings=RunningTotal(
-                        total=user_counts.get('linked_total', 0),
-                        total_daily=user_counts.get('linked_daily', 0),
+                        total=usage_by_storage_addon_es_8.linked_usersettings.total,
+                        total_daily=usage_by_storage_addon_es_8.linked_usersettings.total_daily,
                     ),
                     usersetting_links=RunningTotal(
-                        total=user_counts.get('link_count_total', 0),
-                        total_daily=user_counts.get('link_count_daily', 0),
+                        total=usage_by_storage_addon_es_8.usersetting_links.total,
+                        total_daily=usage_by_storage_addon_es_8.usersetting_links.total_daily,
                     ),
                     connected_nodesettings=RunningTotal(
-                        total=node_counts.get('connected_total', 0),
-                        total_daily=node_counts.get('connected_daily', 0),
+                        total=usage_by_storage_addon_es_8.connected_nodesettings.total,
+                        total_daily=usage_by_storage_addon_es_8.connected_nodesettings.total_daily,
                     ),
                     disconnected_nodesettings=RunningTotal(
-                        total=node_counts.get('disconnected_total', 0),
-                        total_daily=node_counts.get('disconnected_daily', 0),
+                        total=usage_by_storage_addon_es_8.disconnected_nodesettings.total,
+                        total_daily=usage_by_storage_addon_es_8.disconnected_nodesettings.total_daily,
                     ),
                     deleted_nodesettings=RunningTotal(
-                        total=node_counts.get('deleted_total', 0),
-                        total_daily=node_counts.get('deleted_daily', 0),
-                    ),
+                        total=usage_by_storage_addon_es_8.deleted_nodesettings.total,
+                        total_daily=usage_by_storage_addon_es_8.deleted_nodesettings.total_daily,
+                    )
                 )
-            )
-        return [StorageAddonUsage(
+            usage_by_addon.append(usage_by_storage_addon)
+        reports = []
+        report_es8 = StorageAddonUsageEs8(
+            cycle_coverage=f"{date:%Y.%m.%d}",
+            usage_by_addon=usage_by_addon,
+        )
+        reports.append(report_es8)
+        report = StorageAddonUsage(
             report_date=date,
             usage_by_addon=usage_by_addon,
-        )]
+        )
+        reports.append(report)
+        return reports
diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py
index e0a61c7bb10..319100512f0 100644
--- a/osf/metrics/reporters/user_count.py
+++ b/osf/metrics/reporters/user_count.py
@@ -2,19 +2,32 @@
 
 from osf.metrics import UserSummaryReport
 from ._base import DailyReporter
+from osf.metrics.es8_metrics import UserSummaryReportEs8
 
 
 class UserCountReporter(DailyReporter):
 
     def report(self, report_date):
-        report = UserSummaryReport(
-            report_date=report_date,
+        reports = []
+        report_es8 = UserSummaryReportEs8(
+            cycle_coverage=f"{report_date:%Y.%m.%d}",
             active=OSFUser.objects.filter(is_active=True, date_confirmed__date__lte=report_date).count(),
-            deactivated=OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__date__lte=report_date).count(),
+            deactivated=OSFUser.objects.filter(date_disabled__isnull=False,date_disabled__date__lte=report_date).count(),
             merged=OSFUser.objects.filter(date_registered__date__lte=report_date, merged_by__isnull=False).count(),
             new_users_daily=OSFUser.objects.filter(is_active=True, date_confirmed__date=report_date).count(),
             new_users_with_institution_daily=OSFUser.objects.filter(is_active=True, date_confirmed__date=report_date, institutionaffiliation__isnull=False).count(),
             unconfirmed=OSFUser.objects.filter(date_registered__date__lte=report_date, date_confirmed__isnull=True).count(),
         )
+        reports.append(report_es8)
+        report = UserSummaryReport(
+            report_date=report_date,
+            active=report_es8.active,
+            deactivated=report_es8.deactivated,
+            merged=report_es8.merged,
+            new_users_daily=report_es8.new_users_daily,
+            new_users_with_institution_daily=report_es8.new_users_with_institution_daily,
+            unconfirmed=report_es8.unconfirmed,
+        )
+        reports.append(report)
 
-        return [report]
+        return reports

From 59330e8b30b1114123b7d72dcafd1bbc867bfb02 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 00:49:32 +0300
Subject: [PATCH 070/100] flake8

---
 api/metrics/serializers.py                   | 10 +++-
 api_tests/metrics/test_counted_usage.py      | 13 +++++
 osf/metrics/reporters/download_count.py      |  2 +-
 osf/metrics/reporters/node_count.py          |  6 +-
 osf/metrics/reporters/preprint_count.py      | 10 ++--
 osf/metrics/reporters/public_item_usage.py   |  4 +-
 osf/metrics/reporters/storage_addon_usage.py | 58 ++++++++++----------
 osf/metrics/reporters/user_count.py          |  2 +-
 8 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 93c0c2b69bc..7bff222eec2 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -6,8 +6,10 @@
 from api.base.serializers import BaseAPISerializer
 from api.base.utils import absolute_reverse
 from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
-from osf.metrics.es8_metrics import OsfCountedUsageRecord
-from osf.models import Guid
+from osf.metrics.es8_metrics import (
+    OsfCountedUsageRecord,
+    PageviewInfo as PageviewInfoEs8
+)
 from website import settings as website_settings
 
 logger = logging.getLogger(__name__)
@@ -66,8 +68,10 @@ def validate(self, data):
 
     def create(self, validated_data):
         pageview_info = None
+        pageview_info_es8 = None
         if pageview_info_data := validated_data.get('pageview_info'):
             pageview_info = PageviewInfo(**pageview_info_data)
+            pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
         OsfCountedUsageRecord.record(
             platform_iri=website_settings.DOMAIN,
             provider_id=validated_data.get('provider_id'),
@@ -75,7 +79,7 @@ def create(self, validated_data):
             sessionhour_id=validated_data['session_id'],
             user_is_authenticated=validated_data['user_is_authenticated'],
             action_labels=validated_data.get('action_labels'),
-            pageview_info=pageview_info,
+            pageview_info=pageview_info_es8,
         )
 
         return CountedAuthUsage.record(
diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py
index e2cb7040037..7e770619c9f 100644
--- a/api_tests/metrics/test_counted_usage.py
+++ b/api_tests/metrics/test_counted_usage.py
@@ -11,6 +11,7 @@
     # UserFactory,
 )
 from api_tests.utils import create_test_file
+from elasticsearch_metrics.tests.util import djelme_test_backends
 
 
 COUNTED_USAGE_URL = '/_/metrics/events/counted_usage/'
@@ -69,6 +70,12 @@ def test_required_attributes(self, app, attrs):
 
 @pytest.mark.django_db
 class TestComputedFields:
+
+    @pytest.fixture(autouse=True)
+    def _real_elastic(self):
+        with djelme_test_backends():
+            yield
+
     @pytest.fixture(autouse=True)
     def mock_domain(self):
         domain = 'http://example.foo/'
@@ -213,6 +220,12 @@ def test_by_useragent_header(self, app, mock_save):
 @pytest.mark.parametrize('item_public', [True, False])
 @pytest.mark.django_db
 class TestGuidFields:
+
+    @pytest.fixture(autouse=True)
+    def _real_elastic(self):
+        with djelme_test_backends():
+            yield
+
     @pytest.fixture
     def preprint(self, item_public):
         return PreprintFactory(
diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py
index d6f6cea84af..dc1ab50d665 100644
--- a/osf/metrics/reporters/download_count.py
+++ b/osf/metrics/reporters/download_count.py
@@ -15,7 +15,7 @@ def report(self, date):
         reports.append(report_es8)
         report = DownloadCountReport(
             daily_file_downloads=report_es8.daily_file_downloads,
-             report_date=date,
+            report_date=date,
         )
         reports.append(report)
         return reports
diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py
index bb4dcbb165a..9b82f39bb89 100644
--- a/osf/metrics/reporters/node_count.py
+++ b/osf/metrics/reporters/node_count.py
@@ -11,7 +11,7 @@
 from osf.metrics.es8_metrics import (
     NodeSummaryReportEs8,
     NodeRunningTotals as NodeRunningTotalsEs8,
-RegistrationRunningTotals as RegistrationRunningTotalsEs8
+    RegistrationRunningTotals as RegistrationRunningTotalsEs8
 )
 
 logger = logging.getLogger(__name__)
@@ -119,7 +119,7 @@ def report(self, date):
             ),
             # Registered Nodes - the number of registered projects and components
             registered_nodes=RegistrationRunningTotals(
-               total=report_es8.registered_nodes.total,
+                total=report_es8.registered_nodes.total,
                 public=report_es8.registered_nodes.public,
                 embargoed=report_es8.registered_nodes.embargoed,
                 embargoed_v2=report_es8.registered_nodes.embargoed_v2,
@@ -132,7 +132,7 @@ def report(self, date):
             ),
             # Registered Projects - the number of registered top level projects
             registered_projects=RegistrationRunningTotals(
-              total=report_es8.registered_projects.total,
+                total=report_es8.registered_projects.total,
                 public=report_es8.registered_projects.public,
                 embargoed=report_es8.registered_projects.embargoed,
                 embargoed_v2=report_es8.registered_projects.embargoed_v2,
diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py
index 6d8caf2cc03..c592f53c268 100644
--- a/osf/metrics/reporters/preprint_count.py
+++ b/osf/metrics/reporters/preprint_count.py
@@ -57,11 +57,11 @@ def report(self, date):
             )
             reports.append(report_es8)
 
-            report =  PreprintSummaryReport(
-                    report_date=date,
-                    provider_key=report_es8.provider_key,
-                    preprint_count=report_es8.preprint_count,
-                )
+            report = PreprintSummaryReport(
+                report_date=date,
+                provider_key=report_es8.provider_key,
+                preprint_count=report_es8.preprint_count,
+            )
             reports.append(report)
             logger.info('{} Preprints counted for the provider {}'.format(resp['hits']['total'], preprint_provider.name))
 
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index adec89de99e..638a972a4f0 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -186,8 +186,8 @@ def _fill_report_counts(self, report, osf_obj):
 
     def _base_usage_search(self, cumulative: bool = False):
         timestamp_filter = {
-                'lt': self.yearmonth.month_end(),
-            }
+             'lt': self.yearmonth.month_end(),
+        }
         if not cumulative:
             timestamp_filter['gte'] = self.yearmonth.month_start()
         return (
diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index d8974c19cef..6656ee1e673 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -168,36 +168,36 @@ def report(self, date):
             )
             usage_by_addon_es8.append(usage_by_storage_addon_es_8)
             usage_by_storage_addon = UsageByStorageAddon(
-                    addon_shortname=usage_by_storage_addon_es_8.short_name,
-                    enabled_usersettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.enabled_usersettings.total,
-                        total_daily=usage_by_storage_addon_es_8.enabled_usersettings.total_daily,
-                    ),
-                    deleted_usersettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.deleted_usersettings.total,
-                        total_daily=usage_by_storage_addon_es_8.deleted_usersettings.total_daily,
-                    ),
-                    linked_usersettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.linked_usersettings.total,
-                        total_daily=usage_by_storage_addon_es_8.linked_usersettings.total_daily,
-                    ),
-                    usersetting_links=RunningTotal(
-                        total=usage_by_storage_addon_es_8.usersetting_links.total,
-                        total_daily=usage_by_storage_addon_es_8.usersetting_links.total_daily,
-                    ),
-                    connected_nodesettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.connected_nodesettings.total,
-                        total_daily=usage_by_storage_addon_es_8.connected_nodesettings.total_daily,
-                    ),
-                    disconnected_nodesettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.disconnected_nodesettings.total,
-                        total_daily=usage_by_storage_addon_es_8.disconnected_nodesettings.total_daily,
-                    ),
-                    deleted_nodesettings=RunningTotal(
-                        total=usage_by_storage_addon_es_8.deleted_nodesettings.total,
-                        total_daily=usage_by_storage_addon_es_8.deleted_nodesettings.total_daily,
-                    )
+                addon_shortname=usage_by_storage_addon_es_8.short_name,
+                enabled_usersettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.enabled_usersettings.total,
+                    total_daily=usage_by_storage_addon_es_8.enabled_usersettings.total_daily,
+                ),
+                deleted_usersettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.deleted_usersettings.total,
+                    total_daily=usage_by_storage_addon_es_8.deleted_usersettings.total_daily,
+                ),
+                linked_usersettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.linked_usersettings.total,
+                    total_daily=usage_by_storage_addon_es_8.linked_usersettings.total_daily,
+                ),
+                usersetting_links=RunningTotal(
+                    total=usage_by_storage_addon_es_8.usersetting_links.total,
+                    total_daily=usage_by_storage_addon_es_8.usersetting_links.total_daily,
+                ),
+                connected_nodesettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.connected_nodesettings.total,
+                    total_daily=usage_by_storage_addon_es_8.connected_nodesettings.total_daily,
+                ),
+                disconnected_nodesettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.disconnected_nodesettings.total,
+                    total_daily=usage_by_storage_addon_es_8.disconnected_nodesettings.total_daily,
+                ),
+                deleted_nodesettings=RunningTotal(
+                    total=usage_by_storage_addon_es_8.deleted_nodesettings.total,
+                    total_daily=usage_by_storage_addon_es_8.deleted_nodesettings.total_daily,
                 )
+            )
             usage_by_addon.append(usage_by_storage_addon)
         reports = []
         report_es8 = StorageAddonUsageEs8(
diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py
index 319100512f0..80a9ac1e31e 100644
--- a/osf/metrics/reporters/user_count.py
+++ b/osf/metrics/reporters/user_count.py
@@ -12,7 +12,7 @@ def report(self, report_date):
         report_es8 = UserSummaryReportEs8(
             cycle_coverage=f"{report_date:%Y.%m.%d}",
             active=OSFUser.objects.filter(is_active=True, date_confirmed__date__lte=report_date).count(),
-            deactivated=OSFUser.objects.filter(date_disabled__isnull=False,date_disabled__date__lte=report_date).count(),
+            deactivated=OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__date__lte=report_date).count(),
             merged=OSFUser.objects.filter(date_registered__date__lte=report_date, merged_by__isnull=False).count(),
             new_users_daily=OSFUser.objects.filter(is_active=True, date_confirmed__date=report_date).count(),
             new_users_with_institution_daily=OSFUser.objects.filter(is_active=True, date_confirmed__date=report_date, institutionaffiliation__isnull=False).count(),

From 3bbace3996ff692e99d561dd2c031205fd3f403f Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 14:28:15 +0300
Subject: [PATCH 071/100] modify tests

---
 api_tests/metrics/test_counted_usage.py | 45 +++++++++++++++++--------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py
index 7e770619c9f..40c9f6e6aac 100644
--- a/api_tests/metrics/test_counted_usage.py
+++ b/api_tests/metrics/test_counted_usage.py
@@ -88,15 +88,22 @@ def mock_now(self):
         with mock.patch('django.utils.timezone.now', return_value=timestamp):
             yield timestamp
 
+    @pytest.fixture
+    def preprint(self, request):
+        return PreprintFactory(
+            is_public=True,
+            is_published=True,
+        )
+
     @pytest.fixture()
     def user(self):
         with mock.patch('osf.models.base.generate_guid', return_value='guidy'):
             return AuthUserFactory()
 
-    def test_by_client_session_id(self, app, mock_save, user):
+    def test_by_client_session_id(self, app, mock_save, user, preprint):
         payload = counted_usage_payload(
             client_session_id='hello',
-            item_guid='zyxwv',
+            item_guid=preprint._id,
             action_labels=['view', 'api'],
             pageview_info={'page_url': 'http://example.foo/blahblah/blee'},
         )
@@ -111,7 +118,7 @@ def test_by_client_session_id(self, app, mock_save, user):
             expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
-                'item_guid': 'zyxwv',
+                'item_guid': preprint._id,
                 # session_id: sha256(b'hello|1981-01-01').hexdigest()
                 'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
                 'action_labels': ['view', 'api'],
@@ -123,10 +130,10 @@ def test_by_client_session_id(self, app, mock_save, user):
             },
         )
 
-    def test_by_client_session_id_anon(self, app, mock_save):
+    def test_by_client_session_id_anon(self, app, mock_save, preprint):
         payload = counted_usage_payload(
             client_session_id='hello',
-            item_guid='zyxwv',
+            item_guid=preprint._id,
             action_labels=['view', 'web'],
             pageview_info={
                 'page_url': 'http://example.foo/bliz/',
@@ -144,7 +151,7 @@ def test_by_client_session_id_anon(self, app, mock_save):
             expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
-                'item_guid': 'zyxwv',
+                'item_guid': preprint._id,
                 # session_id: sha256(b'hello|1981-01-01').hexdigest()
                 'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
                 'action_labels': ['view', 'web'],
@@ -158,9 +165,9 @@ def test_by_client_session_id_anon(self, app, mock_save):
             },
         )
 
-    def test_by_user_auth(self, app, mock_save, user):
+    def test_by_user_auth(self, app, mock_save, user, preprint):
         payload = counted_usage_payload(
-            item_guid='yxwvu',
+            item_guid=preprint._id,
             action_labels=['view', 'web'],
             pageview_info={
                 'page_url': 'http://osf.io/mst3k',
@@ -178,7 +185,7 @@ def test_by_user_auth(self, app, mock_save, user):
             expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
-                'item_guid': 'yxwvu',
+                'item_guid': preprint._id,
                 # session_id: sha256(b'guidy|1981-01-01|0').hexdigest()
                 'session_id': 'ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a',
                 'action_labels': ['view', 'web'],
@@ -192,10 +199,14 @@ def test_by_user_auth(self, app, mock_save, user):
             },
         )
 
-    def test_by_useragent_header(self, app, mock_save):
+    def test_by_useragent_header(self, app, mock_save, preprint):
         payload = counted_usage_payload(
-            item_guid='yxwvu',
+            item_guid=preprint._id,
             action_labels=['view', 'api'],
+            pageview_info={
+                'page_url': 'http://example.foo/bliz/',
+                'referer_url': 'http://elsewhere.baz/index.php',
+            },
         )
         headers = {
             'User-Agent': 'haha',
@@ -205,14 +216,20 @@ def test_by_useragent_header(self, app, mock_save):
         assert_saved_with(
             mock_save,
             # doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest()
-            expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7',
+            expected_doc_id='6d7549df6734bb955eb832c6316ffae46c2959c95b5817ab4fcb341dbc875c23',
             expected_attrs={
                 'platform_iri': 'http://example.foo/',
-                'item_guid': 'yxwvu',
+                'item_guid': preprint._id,
                 # session_id: sha256(b'localhost:80|haha|1981-01-01|0').hexdigest()
                 'session_id': '97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a',
                 'action_labels': ['view', 'api'],
-                'pageview_info': None,
+                'pageview_info': {
+                    'page_url': 'http://example.foo/bliz/',
+                    'page_path': '/bliz',
+                    'referer_url': 'http://elsewhere.baz/index.php',
+                    'referer_domain': 'elsewhere.baz',
+                    'hour_of_day': 0,
+                },
             },
         )
 

From bd2e7d44decf62d7e3c9f67164fda1ff1848368a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 15:22:33 +0300
Subject: [PATCH 072/100] flake8

---
 osf/metrics/reporters/public_item_usage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index 638a972a4f0..ff407473c0a 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -186,7 +186,7 @@ def _fill_report_counts(self, report, osf_obj):
 
     def _base_usage_search(self, cumulative: bool = False):
         timestamp_filter = {
-             'lt': self.yearmonth.month_end(),
+            'lt': self.yearmonth.month_end(),
         }
         if not cumulative:
             timestamp_filter['gte'] = self.yearmonth.month_start()

From 25bdadd50568b83fb2477f65d44868f3a5a6d421 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 15:42:18 +0300
Subject: [PATCH 073/100] flake8

---
 api/metrics/serializers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 7bff222eec2..120a82bb7b4 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -8,7 +8,7 @@
 from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
 from osf.metrics.es8_metrics import (
     OsfCountedUsageRecord,
-    PageviewInfo as PageviewInfoEs8
+    PageviewInfo as PageviewInfoEs8,
 )
 from website import settings as website_settings
 

From f3729bca808657c281d19d0851b8644de88068d7 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 18:02:15 +0300
Subject: [PATCH 074/100] fix to pass tests

---
 osf/metrics/reporters/institution_summary_monthly.py | 6 ++++--
 osf/metrics/reporters/institutional_users.py         | 5 +++--
 osf/metrics/reporters/private_spam_metrics.py        | 2 +-
 osf/metrics/reporters/public_item_usage.py           | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index bf029aeff12..566f692f21d 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -21,7 +21,9 @@ def iter_report_kwargs(self, continue_after: dict | None = None):
 
     def report(self, **report_kwargs):
         _institution = Institution.objects.get(pk=report_kwargs['institution_pk'])
-        return self.generate_report(_institution)
+        reports =  self.generate_report(_institution)
+        _report = next(r for r in reports if isinstance(r, InstitutionMonthlySummaryReport))
+        return _report
 
     def generate_report(self, institution):
         node_queryset = institution.nodes.filter(
@@ -34,7 +36,7 @@ def generate_report(self, institution):
         preprint_queryset = self.get_published_preprints(institution, self.yearmonth)
         reports = []
         report_es8 = InstitutionMonthlySummaryReportEs8(
-            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
+            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
             institution_id=institution._id,
             user_count=institution.get_institution_users().count(),
             private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False),
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index 1a1289657b3..199b9c36e7c 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -40,7 +40,8 @@ def report(self, **report_kwargs):
         _institution = osfdb.Institution.objects.get(pk=report_kwargs['institution_pk'])
         _user = osfdb.OSFUser.objects.get(pk=report_kwargs['user_pk'])
         _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth)
-        return _helper.reports
+        _report = next(r for r in _helper.reports if isinstance(r, InstitutionalUserReport))
+        return _report
 
 
 # helper
@@ -55,7 +56,7 @@ def __post_init__(self):
         _affiliation = self.user.get_institution_affiliation(self.institution._id)
         self.reports = []
         report_es8 = InstitutionalUserReportEs8(
-            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
+            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
             institution_id=self.institution._id,
             user_id=self.user._id,
             user_name=self.user.fullname,
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index cccd08eb49e..6ffdcf5552a 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -18,7 +18,7 @@ def report(self):
         reports = []
 
         report_es8 = PrivateSpamMetricsReportEs8(
-            cycle_coverage=f"{self.yearmonth:%Y.%m.1}",
+            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
             report_yearmonth=str(self.yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index ff407473c0a..8b32777730b 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -64,7 +64,8 @@ def report(self, **report_kwargs):
             if _guid is None or _guid.referent is None:
                 raise _SkipItem
             _obj = _guid.referent
-            _report = self._init_report(_obj)
+            _reports = self._init_report(_obj)
+            _report = next(r for r in _reports if isinstance(r, PublicItemUsageReport))
             self._fill_report_counts(_report, _obj)
             if not any((
                 _report.view_count,
@@ -139,7 +140,7 @@ def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | PublicIte
             raise _SkipItem
         reports = []
         report_es8 = PublicItemUsageReportEs8(
-            cycle_coverage=f"{self.yearmonth:%Y.%m.%d}",
+            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
             item_osfid=osf_obj._id,
             item_type=[get_item_type(osf_obj)],
             provider_id=[get_provider_id(osf_obj)],

From 3481f14c75100bccbe6b592e39513eaf0b87840a Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Fri, 24 Apr 2026 23:47:38 +0300
Subject: [PATCH 075/100] flake8

---
 osf/metrics/reporters/institution_summary_monthly.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index 566f692f21d..fad2fc592bf 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -21,7 +21,7 @@ def iter_report_kwargs(self, continue_after: dict | None = None):
 
     def report(self, **report_kwargs):
         _institution = Institution.objects.get(pk=report_kwargs['institution_pk'])
-        reports =  self.generate_report(_institution)
+        reports = self.generate_report(_institution)
         _report = next(r for r in reports if isinstance(r, InstitutionMonthlySummaryReport))
         return _report
 

From 0d3164bf1c42503abdc05038a54459a24fa876cd Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 27 Apr 2026 14:36:48 +0300
Subject: [PATCH 076/100] tests improve

---
 .../reporters/institution_summary_monthly.py  |  3 +--
 osf/metrics/reporters/private_spam_metrics.py |  1 -
 osf/metrics/reporters/public_item_usage.py    | 20 +++++++++----------
 .../test_institutional_summary_reporter.py    | 13 +++++++-----
 .../test_public_item_usage_reporter.py        |  6 ++++--
 osf_tests/metrics/test_spam_count_reporter.py |  4 +++-
 6 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index fad2fc592bf..12bb3cfda50 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -22,8 +22,7 @@ def iter_report_kwargs(self, continue_after: dict | None = None):
     def report(self, **report_kwargs):
         _institution = Institution.objects.get(pk=report_kwargs['institution_pk'])
         reports = self.generate_report(_institution)
-        _report = next(r for r in reports if isinstance(r, InstitutionMonthlySummaryReport))
-        return _report
+        return reports
 
     def generate_report(self, institution):
         node_queryset = institution.nodes.filter(
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 6ffdcf5552a..a857ed1b3dc 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -4,7 +4,6 @@
 from ._base import MonthlyReporter
 from osf.metrics.es8_metrics import PrivateSpamMetricsReportEs8
 
-
 class PrivateSpamMetricsReporter(MonthlyReporter):
     report_name = 'Private Spam Metrics'
 
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index 8b32777730b..9deb8db6a18 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -65,16 +65,16 @@ def report(self, **report_kwargs):
                 raise _SkipItem
             _obj = _guid.referent
             _reports = self._init_report(_obj)
-            _report = next(r for r in _reports if isinstance(r, PublicItemUsageReport))
-            self._fill_report_counts(_report, _obj)
-            if not any((
-                _report.view_count,
-                _report.view_session_count,
-                _report.download_count,
-                _report.download_session_count,
-            )):
-                raise _SkipItem
-            return _report
+            for _report in _reports:
+                self._fill_report_counts(_report, _obj)
+                if not any((
+                    _report.view_count,
+                    _report.view_session_count,
+                    _report.download_count,
+                    _report.download_session_count,
+                )):
+                    raise _SkipItem
+            return _reports
         except _SkipItem:
             return None
 
diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
index 05baa4d38e7..32c899dd6f4 100644
--- a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
+++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
@@ -3,6 +3,7 @@
 import logging
 from django.test import TestCase
 from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter
+from osf.metrics.reports import InstitutionMonthlySummaryReport
 from osf.metrics.utils import YearMonth
 from osf_tests.factories import (
     InstitutionFactory,
@@ -79,10 +80,10 @@ def _create_active_user(cls, institution, date_confirmed):
 
     def test_report_generation(self):
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list_monthly_reports(reporter)
-        self.assertEqual(len(reports), 1)
+        reports_raw = list_monthly_reports(reporter)
+        self.assertEqual(len(reports_raw[0]), 2)
 
-        report = reports[0]
+        report = next(r for r in reports_raw[0] if isinstance(r, InstitutionMonthlySummaryReport))
         self.assertEqual(report.institution_id, self._institution._id)
         self.assertEqual(report.user_count, 2)  # _logged_in_user and _active_user
         self.assertEqual(report.public_project_count, 1)
@@ -115,7 +116,8 @@ def test_report_generation_multiple_institutions(self):
 
         # Run the reporter for the current month (February 2018)
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list_monthly_reports(reporter)
+        reports_raw= list_monthly_reports(reporter)
+        reports = [item for sublist in reports_raw for item in sublist if isinstance(item, InstitutionMonthlySummaryReport)]
         self.assertEqual(len(reports), 3)  # Reports for self._institution, institution2, institution3
 
         # Extract reports by institution
@@ -264,7 +266,8 @@ def test_high_counts_multiple_institutions(self):
         if enable_benchmarking:
             reporter_start_time = time.time()
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports = list_monthly_reports(reporter)
+        reports_raw = list_monthly_reports(reporter)
+        reports = [item for sublist in reports_raw for item in sublist if isinstance(item, InstitutionMonthlySummaryReport)]
         assert len(reports) == additional_institution_count + 1
 
         if enable_benchmarking:
diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
index 69bd266285a..082b330afd8 100644
--- a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
+++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py
@@ -174,8 +174,10 @@ def test_no_data(self, ym_empty):
 
     def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2, item0):
         _empty = list_monthly_reports(PublicItemUsageReporter(ym_empty))
-        _sparse = list_monthly_reports(PublicItemUsageReporter(ym_sparse))
-        _busy = list_monthly_reports(PublicItemUsageReporter(ym_busy))
+        _sparse_raw = list_monthly_reports(PublicItemUsageReporter(ym_sparse))
+        _sparse = [item for sublist in _sparse_raw for item in sublist if isinstance(item, PublicItemUsageReport)]
+        _busy_raw = list_monthly_reports(PublicItemUsageReporter(ym_busy))
+        _busy = [item for sublist in _busy_raw for item in sublist if isinstance(item, PublicItemUsageReport)]
 
         # empty month:
         assert _empty == []
diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py
index 0e7ba6956bf..448a8136f7a 100644
--- a/osf_tests/metrics/test_spam_count_reporter.py
+++ b/osf_tests/metrics/test_spam_count_reporter.py
@@ -1,6 +1,7 @@
 import pytest
 from datetime import datetime
 from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter
+from osf.metrics.reports import PrivateSpamMetricsReport
 from osf.metrics.utils import YearMonth
 from osf_tests.factories import NodeLogFactory, NodeFactory
 from unittest.mock import patch
@@ -30,7 +31,8 @@ def test_private_spam_metrics_reporter():
         mock_akismet_get_hammed_count.return_value = 10
 
         reporter = PrivateSpamMetricsReporter(report_yearmonth)
-        report = reporter.report()
+        reports_raw = reporter.report()
+        report = next(r for r in reports_raw if isinstance(r, PrivateSpamMetricsReport))
 
         assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}"
         assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}"

From 08258b788143199078a8d20db0751c6cac781d25 Mon Sep 17 00:00:00 2001
From: Bohdan Odintsov <bodintsov@exoft.net>
Date: Mon, 27 Apr 2026 15:13:57 +0300
Subject: [PATCH 077/100] flake8

---
 .../metrics/reporters/test_institutional_summary_reporter.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
index 32c899dd6f4..f40b5dacec6 100644
--- a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
+++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py
@@ -116,7 +116,7 @@ def test_report_generation_multiple_institutions(self):
 
         # Run the reporter for the current month (February 2018)
         reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth)
-        reports_raw= list_monthly_reports(reporter)
+        reports_raw = list_monthly_reports(reporter)
         reports = [item for sublist in reports_raw for item in sublist if isinstance(item, InstitutionMonthlySummaryReport)]
         self.assertEqual(len(reports), 3)  # Reports for self._institution, institution2, institution3
 

From cb5797bf8680ff183d96c7ad2549afba84cad212 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Mon, 27 Apr 2026 09:20:23 -0400
Subject: [PATCH 078/100] better match elasticsearch_metrics changes

---
 addons/base/views.py                          | 31 ++++++++++---------
 api/metrics/serializers.py                    |  5 +--
 osf/metrics/es8_metrics.py                    | 27 +++++++++++-----
 osf/metrics/reporters/download_count.py       |  5 +--
 osf/metrics/reporters/institution_summary.py  |  5 +--
 .../reporters/institution_summary_monthly.py  |  6 ++--
 osf/metrics/reporters/institutional_users.py  |  6 ++--
 osf/metrics/reporters/new_user_domain.py      |  5 +--
 osf/metrics/reporters/node_count.py           |  5 +--
 .../reporters/osfstorage_file_count.py        |  5 +--
 osf/metrics/reporters/preprint_count.py       |  5 +--
 osf/metrics/reporters/private_spam_metrics.py |  7 +++--
 osf/metrics/reporters/public_item_usage.py    |  4 +--
 osf/metrics/reporters/spam_count.py           |  5 +--
 osf/metrics/reporters/storage_addon_usage.py  |  5 +--
 osf/metrics/reporters/user_count.py           |  5 +--
 osf/metrics/utils.py                          | 22 +++++++++++++
 osf_tests/metrics/test_utils.py               | 23 +++++++++++---
 18 files changed, 117 insertions(+), 59 deletions(-)

diff --git a/addons/base/views.py b/addons/base/views.py
index 4352eb65f30..1e4a8433469 100644
--- a/addons/base/views.py
+++ b/addons/base/views.py
@@ -693,13 +693,15 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
                 path=file_node.path,
             )
             OsfCountedUsageRecord.record(
-                count=1,
-                preprint_id=resource._id,
-                user_id=getattr(auth.user, '_id', None),
-                provider_id=resource.provider._id,
-                database_iri=resource.get_semantic_iri(),
-                version=fileversion.identifier,
-                path=file_node.path,
+                user_id=getattr(user, '_id', None),
+                item_osfid=resource._id,
+                action_labels=[
+                    OsfCountedUsageRecord.ActionLabel.VIEW.value,
+                    OsfCountedUsageRecord.ActionLabel.WEB.value,
+                ],
+                # HACK: we don't have the user request, so fabricate a one-off session id
+                # (this means no double-click filtering and inflated "unique" view counts)
+                client_session_id=str(uuid.uuid4()),
             )
         except es_exceptions.ConnectionError:
             log_exception()
@@ -729,13 +731,14 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
                 path=file_node.path,
             )
             OsfCountedUsageRecord.record(
-                count=1,
-                preprint_id=resource._id,
-                user_id=getattr(auth.user, '_id', None),
-                provider_id=resource.provider._id,
-                database_iri=resource.get_semantic_iri(),
-                version=fileversion.identifier,
-                path=file_node.path,
+                user_id=getattr(user, '_id', None),
+                item_osfid=resource._id,
+                action_labels=[
+                    OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value,
+                ],
+                # HACK: we don't have the user request, so fabricate a one-off session id
+                # (this means no double-click filtering and inflated "unique" download counts)
+                client_session_id=str(uuid.uuid4()),
             )
         except es_exceptions.ConnectionError:
             log_exception()
diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 120a82bb7b4..79c0f9ecc6d 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -73,11 +73,8 @@ def create(self, validated_data):
             pageview_info = PageviewInfo(**pageview_info_data)
             pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
         OsfCountedUsageRecord.record(
-            platform_iri=website_settings.DOMAIN,
-            provider_id=validated_data.get('provider_id'),
             item_osfid=validated_data.get('item_guid'),
-            sessionhour_id=validated_data['session_id'],
-            user_is_authenticated=validated_data['user_is_authenticated'],
+            client_session_id=validated_data['session_id'],
             action_labels=validated_data.get('action_labels'),
             pageview_info=pageview_info_es8,
         )
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 7118ab3cc28..2ab6ec022ec 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -111,6 +111,20 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     class Meta:
         timeseries_index_timedepth = MONTHLY
 
+    class ActionLabel(enum.Enum):
+        SEARCH = 'search'  # counter:Search
+        VIEW = 'view'  # counter:Investigation
+        DOWNLOAD = 'download'  # counter:Request
+        WEB = 'web'  # counter:Regular (aka "pageview")
+        API = 'api'  # counter:TDM (aka "non-web api usage")
+
+    @classmethod
+    def record(cls, **kwargs):
+        # autofill `user_is_authenticated` before `user_id` discarded (couldn't in `clean`)
+        if 'user_is_authenticated' not in kwargs:
+            kwargs['user_is_authenticated'] = bool(kwargs.get('user_id'))
+        return super().record(**kwargs)
+
     @functools.cached_property
     def _osfid_referent(self):
         # for use by autofill methods, if needed
@@ -118,6 +132,7 @@ def _osfid_referent(self):
 
     def clean(self):
         super().clean()
+        self._autofill_platform_iri()
         self._autofill_item_iri_and_osfid()
         self._autofill_item_public()
         self._autofill_item_type()
@@ -126,6 +141,10 @@ def clean(self):
         self._autofill_pageview()
         self._autofill_database_iri()
 
+    def _autofill_platform_iri(self):
+        if self.platform_iri is None:
+            self.platform_iri = website_settings.DOMAIN
+
     def _autofill_item_iri_and_osfid(self):
         if self.item_osfid and not self.item_iri:
             self.item_iri = osf_iri(self.item_osfid)
@@ -227,14 +246,6 @@ def _get_unique_together_values(self):
         )
 
 
-class ActionLabel(enum.Enum):
-    SEARCH = 'search'  # counter:Search
-    VIEW = 'view'  # counter:Investigation
-    DOWNLOAD = 'download'  # counter:Request
-    WEB = 'web'  # counter:Regular (aka "pageview")
-    API = 'api'  # counter:TDM (aka "non-web api usage")
-
-
 class RegistriesModerationMetricsEs8(djelme.EventRecord):
     registration_id: str
     provider_id: str
diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py
index dc1ab50d665..bbc6d0cb655 100644
--- a/osf/metrics/reporters/download_count.py
+++ b/osf/metrics/reporters/download_count.py
@@ -1,7 +1,8 @@
 from osf.models import PageCounter
 from osf.metrics.reports import DownloadCountReport
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import DownloadCountReportEs8
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 
 class DownloadCountReporter(DailyReporter):
@@ -9,7 +10,7 @@ def report(self, date):
         download_count = int(PageCounter.get_all_downloads_on_date(date) or 0)
         reports = []
         report_es8 = DownloadCountReportEs8(
-            cycle_coverage=f"{date:%Y.%m.%d}",
+            cycle_coverage=cycle_coverage_date(date),
             daily_file_downloads=download_count,
         )
         reports.append(report_es8)
diff --git a/osf/metrics/reporters/institution_summary.py b/osf/metrics/reporters/institution_summary.py
index d691dd8b803..1a6cfbbbca7 100644
--- a/osf/metrics/reporters/institution_summary.py
+++ b/osf/metrics/reporters/institution_summary.py
@@ -9,13 +9,14 @@
     RegistrationRunningTotals,
 )
 from osf.models import Institution
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import (
     InstitutionSummaryReportEs8,
     RunningTotal as RunningTotalEs8,
     NodeRunningTotals as NodeRunningTotalsEs8,
     RegistrationRunningTotals as RegistrationRunningTotalsEs8
 )
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -45,7 +46,7 @@ def report(self, date):
                 type='osf.registration',
             )
             report_es8 = InstitutionSummaryReportEs8(
-                cycle_coverage=f"{date:%Y.%m.%d}",
+                cycle_coverage=cycle_coverage_date(date),
                 institution_id=institution._id,
                 institution_name=institution.name,
                 users=RunningTotalEs8(
diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index 12bb3cfda50..1f9afaaf7f7 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -5,9 +5,9 @@
 from osf.models.spam import SpamStatus
 from addons.osfstorage.models import OsfStorageFile
 from osf.metrics.reports import InstitutionMonthlySummaryReport
-from ._base import MonthlyReporter
 from osf.metrics.es8_metrics import InstitutionMonthlySummaryReportEs8
-
+from osf.metrics.utils import cycle_coverage_yearmonth
+from ._base import MonthlyReporter
 
 class InstitutionalSummaryMonthlyReporter(MonthlyReporter):
     """Generate an InstitutionMonthlySummaryReport for each institution."""
@@ -35,7 +35,7 @@ def generate_report(self, institution):
         preprint_queryset = self.get_published_preprints(institution, self.yearmonth)
         reports = []
         report_es8 = InstitutionMonthlySummaryReportEs8(
-            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
+            cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             institution_id=institution._id,
             user_count=institution.get_institution_users().count(),
             private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False),
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index 199b9c36e7c..ae36e59196c 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -8,9 +8,9 @@
 from osf.models.spam import SpamStatus
 from addons.osfstorage.models import OsfStorageFile
 from osf.metrics.reports import InstitutionalUserReport
-from osf.metrics.utils import YearMonth
-from ._base import MonthlyReporter
+from osf.metrics.utils import YearMonth, cycle_coverage_yearmonth
 from osf.metrics.es8_metrics import InstitutionalUserReportEs8
+from ._base import MonthlyReporter
 
 
 class InstitutionalUsersReporter(MonthlyReporter):
@@ -56,7 +56,7 @@ def __post_init__(self):
         _affiliation = self.user.get_institution_affiliation(self.institution._id)
         self.reports = []
         report_es8 = InstitutionalUserReportEs8(
-            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
+            cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             institution_id=self.institution._id,
             user_id=self.user._id,
             user_name=self.user.fullname,
diff --git a/osf/metrics/reporters/new_user_domain.py b/osf/metrics/reporters/new_user_domain.py
index d6bdec2aa6e..a19abaeb22f 100644
--- a/osf/metrics/reporters/new_user_domain.py
+++ b/osf/metrics/reporters/new_user_domain.py
@@ -3,8 +3,9 @@
 
 from osf.models import OSFUser
 from osf.metrics.reports import NewUserDomainReport
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import NewUserDomainReportEs8
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -24,7 +25,7 @@ def report(self, date):
         reports = []
         for domain_name, count in domain_names.items():
             report_es8 = NewUserDomainReportEs8(
-                cycle_coverage=f"{date:%Y.%m.%d}",
+                cycle_coverage=cycle_coverage_date(date),
                 domain_name=domain_name,
                 new_user_count=count,
             )
diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py
index 9b82f39bb89..8e9842ae78e 100644
--- a/osf/metrics/reporters/node_count.py
+++ b/osf/metrics/reporters/node_count.py
@@ -7,12 +7,13 @@
     NodeRunningTotals,
     RegistrationRunningTotals,
 )
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import (
     NodeSummaryReportEs8,
     NodeRunningTotals as NodeRunningTotalsEs8,
     RegistrationRunningTotals as RegistrationRunningTotalsEs8
 )
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -41,7 +42,7 @@ def report(self, date):
         exclude_spam = ~Q(spam_status__in=[SpamStatus.SPAM, SpamStatus.FLAGGED])
         reports = []
         report_es8 = NodeSummaryReportEs8(
-            cycle_coverage=f"{date:%Y.%m.%d}",
+            cycle_coverage=cycle_coverage_date(date),
             # Nodes - the number of projects and components
             nodes=NodeRunningTotalsEs8(
                 total=node_qs.count(),
diff --git a/osf/metrics/reporters/osfstorage_file_count.py b/osf/metrics/reporters/osfstorage_file_count.py
index ea0f5e65b08..f93ed180ebb 100644
--- a/osf/metrics/reporters/osfstorage_file_count.py
+++ b/osf/metrics/reporters/osfstorage_file_count.py
@@ -4,11 +4,12 @@
 
 from osf.metrics.reports import OsfstorageFileCountReport, FileRunningTotals
 from osf.models import AbstractNode, Preprint
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import (
     OsfstorageFileCountReportEs8,
     FileRunningTotals as FileRunningTotalsEs8
 )
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -38,7 +39,7 @@ def report(self, date):
         reports = []
 
         report_es8 = OsfstorageFileCountReportEs8(
-            cycle_coverage=f"{date:%Y.%m.%d}",
+            cycle_coverage=cycle_coverage_date(date),
             files=FileRunningTotalsEs8(
                 total=file_qs.count(),
                 public=file_qs.filter(public_query).count(),
diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py
index c592f53c268..7827f0ef40c 100644
--- a/osf/metrics/reporters/preprint_count.py
+++ b/osf/metrics/reporters/preprint_count.py
@@ -3,8 +3,9 @@
 
 from osf.metrics import PreprintSummaryReport
 from website import settings
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import PreprintSummaryReportEs8
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -51,7 +52,7 @@ def report(self, date):
             resp = requests.post(f'{settings.SHARE_URL}api/v2/search/creativeworks/_search', json=elastic_query).json()
 
             report_es8 = PreprintSummaryReportEs8(
-                cycle_coverage=f"{date:%Y.%m.%d}",
+                cycle_coverage=cycle_coverage_date(date),
                 provider_key=preprint_provider._id,
                 preprint_count=resp['hits']['total'],
             )
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index a857ed1b3dc..49850605cd0 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -1,8 +1,10 @@
 from osf.metrics.reports import PrivateSpamMetricsReport
 from osf.external.oopspam.client import OOPSpamClient
 from osf.external.askismet.client import AkismetClient
-from ._base import MonthlyReporter
 from osf.metrics.es8_metrics import PrivateSpamMetricsReportEs8
+from osf.metrics.utils import cycle_coverage_yearmonth
+from ._base import MonthlyReporter
+
 
 class PrivateSpamMetricsReporter(MonthlyReporter):
     report_name = 'Private Spam Metrics'
@@ -17,8 +19,7 @@ def report(self):
         reports = []
 
         report_es8 = PrivateSpamMetricsReportEs8(
-            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
-            report_yearmonth=str(self.yearmonth),
+            cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
             node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'),
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index 9deb8db6a18..d9b0dd0734c 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -21,7 +21,7 @@
     PreprintView,
 )
 from osf.metrics.reports import PublicItemUsageReport
-from osf.metrics.utils import YearMonth
+from osf.metrics.utils import YearMonth, cycle_coverage_yearmonth
 from osf import models as osfdb
 from website import settings as website_settings
 from ._base import MonthlyReporter
@@ -140,7 +140,7 @@ def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | PublicIte
             raise _SkipItem
         reports = []
         report_es8 = PublicItemUsageReportEs8(
-            cycle_coverage=f"{self.yearmonth.year}.{self.yearmonth.month}",
+            cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             item_osfid=osf_obj._id,
             item_type=[get_item_type(osf_obj)],
             provider_id=[get_provider_id(osf_obj)],
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index b6bc4b6e152..23c74697a54 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -1,10 +1,11 @@
 from osf.models import OSFUser
 
 from osf.metrics.reports import SpamSummaryReport
-from ._base import MonthlyReporter
 from osf.models import PreprintLog, NodeLog
 from osf.models.spam import SpamStatus
 from osf.metrics.es8_metrics import SpamSummaryReportEs8
+from osf.metrics.utils import cycle_coverage_yearmonth
+from ._base import MonthlyReporter
 
 class SpamCountReporter(MonthlyReporter):
 
@@ -14,7 +15,7 @@ def report(self, **report_kwargs):
         next_month = self.yearmonth.month_end()
         reports = []
         report_es8 = SpamSummaryReportEs8(
-            cycle_coverage=f"{self.yearmonth:%Y.%m}",
+            cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             node_confirmed_spam=NodeLog.objects.filter(
                 action=NodeLog.CONFIRM_SPAM,
                 created__gt=target_month,
diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index 6656ee1e673..f630f8809ba 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -13,12 +13,13 @@
 from osf.metrics.reports import StorageAddonUsage, RunningTotal, UsageByStorageAddon
 from osf.models import SpamStatus, Tag
 from website import settings
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import (
     StorageAddonUsageEs8,
     UsageByStorageAddon as UsageByStorageAddonEs8,
     RunningTotal as RunningTotalEs8
 )
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -201,7 +202,7 @@ def report(self, date):
             usage_by_addon.append(usage_by_storage_addon)
         reports = []
         report_es8 = StorageAddonUsageEs8(
-            cycle_coverage=f"{date:%Y.%m.%d}",
+            cycle_coverage=cycle_coverage_date(date),
             usage_by_addon=usage_by_addon,
         )
         reports.append(report_es8)
diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py
index 80a9ac1e31e..089fcb63f47 100644
--- a/osf/metrics/reporters/user_count.py
+++ b/osf/metrics/reporters/user_count.py
@@ -1,8 +1,9 @@
 from osf.models import OSFUser
 
 from osf.metrics import UserSummaryReport
-from ._base import DailyReporter
 from osf.metrics.es8_metrics import UserSummaryReportEs8
+from osf.metrics.utils import cycle_coverage_date
+from ._base import DailyReporter
 
 
 class UserCountReporter(DailyReporter):
@@ -10,7 +11,7 @@ class UserCountReporter(DailyReporter):
     def report(self, report_date):
         reports = []
         report_es8 = UserSummaryReportEs8(
-            cycle_coverage=f"{report_date:%Y.%m.%d}",
+            cycle_coverage=cycle_coverage_date(report_date),
             active=OSFUser.objects.filter(is_active=True, date_confirmed__date__lte=report_date).count(),
             deactivated=OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__date__lte=report_date).count(),
             merged=OSFUser.objects.filter(date_registered__date__lte=report_date, merged_by__isnull=False).count(),
diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py
index 973b8bf1ef3..c5d49f293cf 100644
--- a/osf/metrics/utils.py
+++ b/osf/metrics/utils.py
@@ -6,6 +6,28 @@
 from hashlib import sha256
 from typing import ClassVar
 
+from elasticsearch_metrics.util.timeparts import format_timeparts
+
+
+def cycle_coverage_date(given_date: datetime.date) -> str:
+    """
+    >>> cycle_coverage_date(datetime.date(1234, 5, 6))
+    '1234.5.6'
+    >>> cycle_coverage_date(datetime.datetime(7654, 3, 2, 1))
+    '7654.3.2'
+    """
+    return format_timeparts(given_date, 3)
+
+
+def cycle_coverage_yearmonth(given_ym: YearMonth | datetime.date) -> str:
+    """
+    >>> cycle_coverage_yearmonth(YearMonth(2222, 33))
+    '2222.33'
+    >>> cycle_coverage_yearmonth(datetime.date(1234, 5, 6))
+    '1234.5'
+    """
+    return format_timeparts((given_ym.year, given_ym.month), 2)
+
 
 def stable_key(*key_parts):
     """hash function for use in osf.metrics
diff --git a/osf_tests/metrics/test_utils.py b/osf_tests/metrics/test_utils.py
index a9d312f2331..47f16be6404 100644
--- a/osf_tests/metrics/test_utils.py
+++ b/osf_tests/metrics/test_utils.py
@@ -1,15 +1,20 @@
-from datetime import date
+import datetime
 
 import pytest
 
-from osf.metrics.utils import stable_key
+from osf.metrics.utils import (
+    stable_key,
+    cycle_coverage_date,
+    cycle_coverage_yearmonth,
+    YearMonth,
+)
 
 
 class TestStableKey:
     @pytest.mark.parametrize('args, expected_key', [
         (['foo'], '2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae'),
-        ([date(1953, 7, 2)], '3943be98daa91031ee7d0e0765472ce1b4a50a21f8c6dcd31047d530a50ada93'),
-        (['floo', 'blar', date(3049, 2, 2)], '853cef24d58fa8cd69b20d7dfbcdbd33f20ccda1a14f57e25e43c2533504b64f'),
+        ([datetime.date(1953, 7, 2)], '3943be98daa91031ee7d0e0765472ce1b4a50a21f8c6dcd31047d530a50ada93'),
+        (['floo', 'blar', datetime.date(3049, 2, 2)], '853cef24d58fa8cd69b20d7dfbcdbd33f20ccda1a14f57e25e43c2533504b64f'),
         ([1, 2, 7.3], '6ab892f8109fd23b03ab24aebc4e343ed2a058d9a72f750bf90ba051627d233e'),
     ])
     def test_successes(self, args, expected_key):
@@ -24,3 +29,13 @@ def test_successes(self, args, expected_key):
     def test_value_errors(self, args):
         with pytest.raises(ValueError):
             stable_key(*args)
+
+
+def test_cycle_coverage_date():
+    assert cycle_coverage_date(datetime.date(1234, 5, 6)) == '1234.5.6'
+    assert cycle_coverage_date(datetime.datetime(7654, 3, 2, 1)) == '7654.3.2'
+
+
+def test_cycle_coverage_yearmonth():
+    assert cycle_coverage_yearmonth(YearMonth(2222, 33)) == '2222.33'
+    assert cycle_coverage_yearmonth(datetime.date(1234, 5, 6)) == '1234.5'

From c4fa5f7928b1c3537db62e9a4d07e8a664047378 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Mon, 27 Apr 2026 13:08:49 -0400
Subject: [PATCH 079/100] fix: counted-usage with session-hour

---
 api/metrics/serializers.py              | 12 ++++++++----
 api/metrics/views.py                    | 11 ++++++++++-
 api_tests/metrics/test_counted_usage.py | 10 +++++-----
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 79c0f9ecc6d..1e867b43981 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -46,7 +46,7 @@ class PageviewInfoSerializer(ser.Serializer):
 
 
 class CountedAuthUsageSerializer(ser.Serializer):
-    item_guid = ser.CharField(max_length=255, required=False)
+    item_guid = ser.CharField(max_length=255, required=True)
     client_session_id = ser.CharField(max_length=255, required=False)
     provider_id = ser.CharField(max_length=255, required=False)
 
@@ -73,12 +73,16 @@ def create(self, validated_data):
             pageview_info = PageviewInfo(**pageview_info_data)
             pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
         OsfCountedUsageRecord.record(
-            item_osfid=validated_data.get('item_guid'),
-            client_session_id=validated_data['session_id'],
+            item_osfid=validated_data['item_guid'],
             action_labels=validated_data.get('action_labels'),
+            provider_id=validated_data.get('provider_id'),
             pageview_info=pageview_info_es8,
+            # used to create a COUNTER session-hour id, not stored:
+            client_session_id=validated_data.get('client_session_id'),
+            user_id=self.context.get('user_id'),
+            request_host=self.context.get('request_host'),
+            request_useragent=self.context.get('request_useragent'),
         )
-
         return CountedAuthUsage.record(
             platform_iri=website_settings.DOMAIN,
             provider_id=validated_data.get('provider_id'),
diff --git a/api/metrics/views.py b/api/metrics/views.py
index 69c44027ec9..508f649ee13 100644
--- a/api/metrics/views.py
+++ b/api/metrics/views.py
@@ -394,7 +394,14 @@ class CountedAuthUsageView(JSONAPIBaseView):
     serializer_class = CountedAuthUsageSerializer
 
     def post(self, request, *args, **kwargs):
-        serializer = self.serializer_class(data=request.data)
+        serializer = self.serializer_class(
+            data=request.data,
+            context={
+                'user_id': request.user._id if request.user.is_authenticated else None,
+                'request_host': request.get_host(),
+                'request_useragent': request.META.get('HTTP_USER_AGENT', ''),
+            },
+        )
         serializer.is_valid(raise_exception=True)
         session_id, user_is_authenticated = self._get_session_id(
             request,
@@ -404,6 +411,8 @@ def post(self, request, *args, **kwargs):
         return HttpResponse(status=201)
 
     def _get_session_id(self, request, client_session_id=None):
+        # NOTE: to remove after osfmetrics 6to8 migration -- logic moved to djelme
+
         # get a session id as described in the COUNTER code of practice:
         # https://cop5.projectcounter.org/en/5.0.2/07-processing/03-counting-unique-items.html
         # -- different from the "login session" tracked by `osf.models.Session` (which
diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py
index 40c9f6e6aac..04c15a5b516 100644
--- a/api_tests/metrics/test_counted_usage.py
+++ b/api_tests/metrics/test_counted_usage.py
@@ -287,7 +287,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
             item_guid=preprint._id,
             action_labels=['view', 'web'],
         )
-        resp = app.post_json_api(COUNTED_USAGE_URL, payload)
+        resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
@@ -306,7 +306,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
             item_guid=preprint.primary_file.get_guid(create=True)._id,
             action_labels=['view', 'web'],
         )
-        resp = app.post_json_api(COUNTED_USAGE_URL, payload)
+        resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
@@ -325,7 +325,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
             item_guid=child_reg_file_guid,
             action_labels=['view', 'web'],
         )
-        resp = app.post_json_api(COUNTED_USAGE_URL, payload)
+        resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
@@ -348,7 +348,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
             item_guid=child_reg._id,
             action_labels=['view', 'web'],
         )
-        resp = app.post_json_api(COUNTED_USAGE_URL, payload)
+        resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,
@@ -370,7 +370,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
             item_guid=parent_reg._id,
             action_labels=['view', 'web'],
         )
-        resp = app.post_json_api(COUNTED_USAGE_URL, payload)
+        resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
         assert resp.status_code == 201
         assert_saved_with(
             mock_save,

From f88ed2f64eb56e217ea704b9377aaab959d2d595 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Mon, 27 Apr 2026 17:42:52 -0400
Subject: [PATCH 080/100] fix(staging): elastic hostname with ip url

---
 api/base/settings/defaults.py | 1 +
 website/settings/defaults.py  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py
index c6d604745a7..ac9a9739f1b 100644
--- a/api/base/settings/defaults.py
+++ b/api/base/settings/defaults.py
@@ -337,6 +337,7 @@
                 if osf_settings.ELASTIC8_SECRET is not None
                 else None
             ),
+            'ssl_assert_hostname': osf_settings.ELASTIC8_ASSERT_HOSTNAME,
             # djelme-specific kwargs
             'djelme_default_index_name_prefix': osf_settings.SHARE_PROVIDER_PREPEND,
         },
diff --git a/website/settings/defaults.py b/website/settings/defaults.py
index ffa15bcc6e5..ca95f9f0dab 100644
--- a/website/settings/defaults.py
+++ b/website/settings/defaults.py
@@ -109,6 +109,7 @@ def parent_dir(path):
 ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201')
 ELASTIC8_URI = os.environ.get('ELASTIC8_URI')
 ELASTIC8_CERT_PATH = os.environ.get('ELASTIC8_CERT_PATH')
+ELASTIC8_ASSERT_HOSTNAME = os.environ.get('ELASTIC8_ASSERT_HOSTNAME')
 ELASTIC8_USERNAME = os.environ.get('ELASTIC8_USERNAME', 'elastic')
 ELASTIC8_SECRET = os.environ.get('ELASTIC8_SECRET')
 ELASTIC_TIMEOUT = 10

From 43587565297bba73eba2003fb76044702fb264e8 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Mon, 27 Apr 2026 18:08:05 -0400
Subject: [PATCH 081/100] fix: s/check_metrics/djelme_backend_check

---
 osf/management/commands/check_deploy_ready.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/management/commands/check_deploy_ready.py b/osf/management/commands/check_deploy_ready.py
index 39fc98e9bee..527c900f273 100644
--- a/osf/management/commands/check_deploy_ready.py
+++ b/osf/management/commands/check_deploy_ready.py
@@ -18,6 +18,6 @@ def handle(self, *args, **options):
         ]
 
         if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
-            CHECKS.append(['check_metrics'])
+            CHECKS.append(['djelme_backend_check'])
         for check in CHECKS:
             call_command(*check)

From 76138f16a30faf5a4d93c3e34625e75829e35ef5 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 08:43:20 -0400
Subject: [PATCH 082/100] fix: /_/metrics/raw- passthru

---
 api/metrics/views.py | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/api/metrics/views.py b/api/metrics/views.py
index 41b5245fef1..bd53bee296e 100644
--- a/api/metrics/views.py
+++ b/api/metrics/views.py
@@ -228,25 +228,43 @@ def delete(self, request, *args, **kwargs):
 
     @require_switch(ENABLE_RAW_METRICS)
     def get(self, request, *args, djelme_backend_name, url_path, **kwargs):
-        connection = self._get_es_connection(djelme_backend_name)
-        _response = connection.transport.perform_request('GET', f'/{url_path}')
-        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
+        _response_body = self._do_es_request(
+            djelme_backend_name,
+            method='GET',
+            path=url_path,
+            qp=request.GET,
+        )
+        return JsonResponse(_response_body)
 
     @require_switch(ENABLE_RAW_METRICS)
     def post(self, request, *args, djelme_backend_name, url_path, **kwargs):
-        connection = self._get_es_connection(djelme_backend_name)
-        body = json.loads(request.body)
-        _response = connection.transport.perform_request('POST', f'/{url_path}', body=body)
-        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
+        _response_body = self._do_es_request(
+            djelme_backend_name,
+            method='POST',
+            path=url_path,
+            qp=request.GET,
+            body=json.loads(request.body),
+        )
+        return JsonResponse(_response_body)
 
     @require_switch(ENABLE_RAW_METRICS)
     def put(self, request, *args, djelme_backend_name, url_path, **kwargs):
-        connection = self._get_es_connection(djelme_backend_name)
-        body = json.loads(request.body)
-        _response = connection.transport.perform_request('PUT', f'/{url_path}', body=body)
-        return JsonResponse(_response if isinstance(_response, dict) else _response.body)
+        _response_body = self._do_es_request(
+            djelme_backend_name,
+            method='PUT',
+            path=url_path,
+            qp=request.GET,
+            body=json.loads(request.body),
+        )
+        return JsonResponse(_response_body)
+
+    def _do_es_request(self, djelme_backend_name, method, path, qp, body=None):
+        _client = self._get_es_client(djelme_backend_name)
+        _perform_fn = getattr(_client, 'perform_request', None) or _client.transport.perform_request
+        _response = _perform_fn(method, f'/{path}', params=qp.dict(), body=body)
+        return _response if isinstance(_response, dict) else _response.body
 
-    def _get_es_connection(self, djelme_backend_name):
+    def _get_es_client(self, djelme_backend_name):
         try:
             _backend = djelme_registry.get_backend(djelme_backend_name)
         except LookupError:

From d9e007624e9526d1e234d5e08261a7c61a9284b1 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 11:19:36 -0400
Subject: [PATCH 083/100] fix: s/short_name/addon_shortname

---
 osf/metrics/reporters/storage_addon_usage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index f630f8809ba..d17528f98fb 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -169,7 +169,7 @@ def report(self, date):
             )
             usage_by_addon_es8.append(usage_by_storage_addon_es_8)
             usage_by_storage_addon = UsageByStorageAddon(
-                addon_shortname=usage_by_storage_addon_es_8.short_name,
+                addon_shortname=usage_by_storage_addon_es_8.addon_shortname,
                 enabled_usersettings=RunningTotal(
                     total=usage_by_storage_addon_es_8.enabled_usersettings.total,
                     total_daily=usage_by_storage_addon_es_8.enabled_usersettings.total_daily,

From cfa5085266ad84d2f8c2172fe2239e268482ef40 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 11:19:50 -0400
Subject: [PATCH 084/100] fix: make unused fields optional

---
 osf/metrics/es8_metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 2ab6ec022ec..1ec0f504cbf 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -279,11 +279,11 @@ class FileRunningTotals(esdsl.InnerDoc):
 
 class NodeRunningTotals(esdsl.InnerDoc):
     total: int
-    total_excluding_spam: int
+    total_excluding_spam: int | None
     public: int
     private: int
     total_daily: int
-    total_daily_excluding_spam: int
+    total_daily_excluding_spam: int | None
     public_daily: int
     private_daily: int
 
@@ -293,12 +293,12 @@ class RegistrationRunningTotals(esdsl.InnerDoc):
     public: int
     embargoed: int
     embargoed_v2: int
-    withdrawn: int
+    withdrawn: int | None
     total_daily: int
     public_daily: int
     embargoed_daily: int
     embargoed_v2_daily: int
-    withdrawn_daily: int
+    withdrawn_daily: int | None
 
 
 class UsageByStorageAddon(esdsl.InnerDoc):

From 80d2f0db459a72983ca0f89bdbf7bcf9d162f55c Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 11:59:16 -0400
Subject: [PATCH 085/100] fix: item_type list in osfmetrics 6to8

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 392ebb330b0..489f84967dc 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -506,7 +506,12 @@ def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str)
     return _view_count
 
 
-def _convert_item_type(osf_model_name: str | None, has_surrounding_items: bool):
+def _convert_item_type(osf_model_name: str | list[str] | None, has_surrounding_items: bool):
+    if isinstance(osf_model_name, list):
+        return [
+            _convert_item_type(_model_name, has_surrounding_items)
+            for _model_name in osf_model_name
+        ]
     if osf_model_name:
         try:
             return osfmap_type_from_model(

From aca7447c7d18a6ade51b9339cd2144058098f28f Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 11:59:39 -0400
Subject: [PATCH 086/100] fix: es8 Field.serialize with skip_empty

---
 osf/metrics/es8_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 1ec0f504cbf..af2e3ccb8ff 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -37,7 +37,7 @@ def deserialize(self, data):
         except ValueError:
             raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')
 
-    def serialize(self, data):
+    def serialize(self, data, skip_empty=True):
         if isinstance(data, str):
             return data
         elif isinstance(data, YearMonth):

From f0ffadd176dbf3adabdaffd8c38e88a5666c1dff Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 14:05:58 -0400
Subject: [PATCH 087/100] fix: some optional osfmetrics report fields

---
 osf/metrics/es8_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index af2e3ccb8ff..7c90749f80c 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -265,7 +265,7 @@ class Meta:
 
 class RunningTotal(esdsl.InnerDoc):
     total: int
-    total_daily: int
+    total_daily: int | None
 
 
 class FileRunningTotals(esdsl.InnerDoc):
@@ -443,7 +443,7 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
     # user info:
     user_id: str
     user_name: str
-    department_name: str
+    department_name: str | None
     month_last_login = YearmonthField()
     month_last_active = YearmonthField()
     account_creation_date = YearmonthField()

From 5b06f6ad5f840d87d5f9c37e375cc625943180ad Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 14:06:28 -0400
Subject: [PATCH 088/100] fix: 0 session counts

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 489f84967dc..62450d081eb 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -399,12 +399,12 @@ def _convert_public_usage_report(
         ),
         provider_id=source.get('provider_id'),
         platform_iri=source.get('platform_iri') or website_settings.DOMAIN,
-        view_count=source.get('view_count'),
-        view_session_count=source.get('view_session_count'),
+        view_count=source.get('view_count', 0),
+        view_session_count=source.get('view_session_count', 0),
         cumulative_view_count=_c_views,
         cumulative_view_session_count=_c_view_sess,
-        download_count=source.get('download_count'),
-        download_session_count=source.get('download_session_count'),
+        download_count=source.get('download_count', 0),
+        download_session_count=source.get('download_session_count', 0),
         cumulative_download_count=_c_downloads,
         cumulative_download_session_count=_c_download_sess,
     )

From fd59272dc86c8a31c92bd0b41413a878c37d14e4 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 14:32:42 -0400
Subject: [PATCH 089/100] avoid duplicate moderation events

---
 osf/metrics/es8_metrics.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 7c90749f80c..e493d59fc54 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -247,6 +247,10 @@ def _get_unique_together_values(self):
 
 
 class RegistriesModerationMetricsEs8(djelme.EventRecord):
+    UNIQUE_TOGETHER_FIELDS = (
+        'timestamp', 'registration_id', 'trigger', 'from_state', 'to_state', 'user_id'
+    )
+
     registration_id: str
     provider_id: str
     trigger: str

From 8ebd570af7c53f3c3faabffb27efe5fd832d8622 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Tue, 28 Apr 2026 15:06:12 -0400
Subject: [PATCH 090/100] fix(6to8): more relevant usage event count

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 62450d081eb..e41951a9232 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -670,7 +670,7 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
             _es6_count = (
                 _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
             )
-            _es8_count = es8_metrics.OsfCountedUsageRecord.search().count()
+            _es8_count = es8_metrics.OsfCountedUsageRecord.search().filter(_range_q).count()
             self._write_tabbed('es6', PreprintView, _es6_pview_count)
             self._write_tabbed('es6', PreprintDownload, _es6_pdownload_count)
             self._write_tabbed('es6', CountedUsageEs6, _es6_usage_event_count)

From 9bcc951e4f9ca3501edcd8320711c5afb6a20d77 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 08:18:00 -0400
Subject: [PATCH 091/100] fix: mirror reg mod event to es8

---
 osf/models/registrations.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/osf/models/registrations.py b/osf/models/registrations.py
index e9114355649..4afc929ba79 100644
--- a/osf/models/registrations.py
+++ b/osf/models/registrations.py
@@ -25,6 +25,7 @@
 from osf.exceptions import NodeStateError, DraftRegistrationStateError
 from osf.external.internet_archive.tasks import archive_to_ia, update_ia_metadata
 from osf.metrics import RegistriesModerationMetrics
+from osf.metrics.es8_metrics import RegistriesModerationMetricsEs8
 from osf.models.notification_type import NotificationTypeEnum
 from .action import RegistrationAction
 from .archive import ArchiveJob
@@ -786,6 +787,15 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment
         action.save()
         if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
             RegistriesModerationMetrics.record_transitions(action)
+            RegistriesModerationMetricsEs8.record(
+                registration_id=action.target._id,
+                provider_id=action.target.provider._id,
+                from_state=action.from_state,
+                to_state=action.to_state,
+                trigger=action.trigger,
+                user_id=action.creator._id,
+                comment=action.comment,
+            )
 
         moderation_notifications = {
             RegistrationModerationTriggers.SUBMIT: notify.notify_submit,

From 3c24a6bd07a383d2b4dd175c1a6fb626de845dac Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 08:30:23 -0400
Subject: [PATCH 092/100] fix: autofill referent

---
 addons/base/views.py       | 6 ++++--
 osf/metrics/es8_metrics.py | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/addons/base/views.py b/addons/base/views.py
index e10b9bfee60..285199328e5 100644
--- a/addons/base/views.py
+++ b/addons/base/views.py
@@ -700,7 +700,8 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
                     OsfCountedUsageRecord.ActionLabel.WEB.value,
                 ],
                 # HACK: we don't have the user request, so fabricate a one-off session id
-                # (this means no double-click filtering and inflated "unique" view counts)
+                # (this means no double-click filtering for anonymous users (same as before)
+                # and potentially inflated "unique" sessionhour view counts)
                 client_session_id=str(uuid.uuid4()),
             )
         except es_exceptions.ConnectionError:
@@ -737,7 +738,8 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
                     OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value,
                 ],
                 # HACK: we don't have the user request, so fabricate a one-off session id
-                # (this means no double-click filtering and inflated "unique" download counts)
+                # (this means no double-click filtering for anonymous users (same as before)
+                # and potentially inflated "unique" sessionhour view counts)
                 client_session_id=str(uuid.uuid4()),
             )
         except es_exceptions.ConnectionError:
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index e493d59fc54..bf5f2fc020f 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -128,7 +128,8 @@ def record(cls, **kwargs):
     @functools.cached_property
     def _osfid_referent(self):
         # for use by autofill methods, if needed
-        return osfdb.Guid.load(self.item_osfid)
+        _osfguid = osfdb.Guid.load(self.item_osfid)
+        return _osfguid.referent if _osfguid else None
 
     def clean(self):
         super().clean()

From 4be53ddd6f16df121ac05a96c05b55b9c36bfe4d Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 09:04:49 -0400
Subject: [PATCH 093/100] fix? double-counted usage

---
 osf/metrics/es8_metrics.py | 76 ++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 32 deletions(-)

diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index bf5f2fc020f..6bcf4f0d4f6 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -90,6 +90,17 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
     https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
     https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
     '''
+    UNIQUE_TOGETHER_FIELDS = (
+        'platform_iri',
+        'sessionhour_id',
+        'action_labels',
+        # include some non-field properties for more complex logic to
+        # slightly better approximate `counter:Double-Click Filtering`
+        # and allow for multiple pages describing the same item_iri
+        '_page_url_or_osfid',  # non-field property
+        '_timestamp_date',  # non-field property
+        '_timestamp_30sec_window',  # non-field property
+    )
 
     # inherited fields:
     #     timestamp: datetime.datetime
@@ -125,6 +136,34 @@ def record(cls, **kwargs):
             kwargs['user_is_authenticated'] = bool(kwargs.get('user_id'))
         return super().record(**kwargs)
 
+    @property
+    def _page_url_or_osfid(self):
+        # for UNIQUE_TOGETHER_FIELDS
+        return (
+            self.pageview_info.page_url
+            if self.pageview_info is not None and self.pageview_info.page_url is not None
+            else self.item_osfid
+        )
+
+    @property
+    def _timestamp_date(self):
+        # for UNIQUE_TOGETHER_FIELDS
+        return self.timestamp.date()
+
+    @property
+    def _timestamp_30sec_window(self):
+        # for UNIQUE_TOGETHER_FIELDS
+        # slice the day into an array of 30-second windows,
+        # find this timestamp's windowslice index
+        day_start = datetime.datetime(
+            self.timestamp.year,
+            self.timestamp.month,
+            self.timestamp.day,
+            tzinfo=self.timestamp.tzinfo,
+        )
+        time_in_seconds = (self.timestamp - day_start).total_seconds()
+        return int(time_in_seconds / 30)  # 30-second windows
+
     @functools.cached_property
     def _osfid_referent(self):
         # for use by autofill methods, if needed
@@ -132,7 +171,6 @@ def _osfid_referent(self):
         return _osfguid.referent if _osfguid else None
 
     def clean(self):
-        super().clean()
         self._autofill_platform_iri()
         self._autofill_item_iri_and_osfid()
         self._autofill_item_public()
@@ -141,6 +179,8 @@ def clean(self):
         self._autofill_within_iris()
         self._autofill_pageview()
         self._autofill_database_iri()
+        self._clean_action_labels()
+        super().clean()
 
     def _autofill_platform_iri(self):
         if self.platform_iri is None:
@@ -214,37 +254,9 @@ def _autofill_database_iri(self):
             else:
                 self.database_iri = _provider.get_semantic_iri()
 
-    def _get_unique_together_values(self):
-        """get "unique together" values for "ON CONFLICT UPDATE" behavior
-
-        override djelme.BaseDjelmeRecord._get_unique_together_values
-        for more complex logic than UNIQUE_TOGETHER_FIELDS
-        to slightly better approximate `counter:Double-Click Filtering`
-        """
-        # note: copied from osf.metrics.counted_usage._fill_document_id
-        target_identifier = (
-            self.pageview_info.page_url
-            if self.pageview_info is not None and self.pageview_info.page_url is not None
-            else self.item_osfid
-        )
-        # slice the day into an array of 30-second windows,
-        # find this timestamp's windowslice index
-        day_start = datetime.datetime(
-            self.timestamp.year,
-            self.timestamp.month,
-            self.timestamp.day,
-            tzinfo=self.timestamp.tzinfo,
-        )
-        time_in_seconds = (self.timestamp - day_start).total_seconds()
-        time_window = int(time_in_seconds / 30)  # 30-second windows
-        return (  # unique-together values:
-            self.platform_iri,
-            target_identifier,
-            self.sessionhour_id,
-            self.timestamp.date(),
-            time_window,
-            ','.join(sorted(self.action_labels)),
-        )
+    def _clean_action_labels(self):
+        if self.action_labels:
+            self.action_labels = sorted(self.action_labels)
 
 
 class RegistriesModerationMetricsEs8(djelme.EventRecord):

From 88a939a1f25a94c767e910930e1f8e6f3fcb857d Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 09:30:15 -0400
Subject: [PATCH 094/100] renames for consistency and clarity

---
 addons/base/views.py                          | 12 +--
 api/metrics/serializers.py                    |  4 +-
 .../commands/migrate_osfmetrics_6to8.py       | 74 +++++++++----------
 osf/metrics/es8_metrics.py                    | 58 +++++++--------
 osf/metrics/reporters/download_count.py       |  4 +-
 osf/metrics/reporters/institution_summary.py  |  4 +-
 .../reporters/institution_summary_monthly.py  |  4 +-
 osf/metrics/reporters/institutional_users.py  |  6 +-
 osf/metrics/reporters/new_user_domain.py      |  4 +-
 osf/metrics/reporters/node_count.py           |  4 +-
 .../reporters/osfstorage_file_count.py        |  4 +-
 osf/metrics/reporters/preprint_count.py       |  4 +-
 osf/metrics/reporters/private_spam_metrics.py |  4 +-
 osf/metrics/reporters/public_item_usage.py    |  6 +-
 osf/metrics/reporters/spam_count.py           |  4 +-
 osf/metrics/reporters/storage_addon_usage.py  |  4 +-
 osf/metrics/reporters/user_count.py           |  4 +-
 osf/models/registrations.py                   |  4 +-
 osf_tests/metrics/test_es8_metrics.py         | 16 ++--
 19 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/addons/base/views.py b/addons/base/views.py
index 285199328e5..12b78fb9957 100644
--- a/addons/base/views.py
+++ b/addons/base/views.py
@@ -34,7 +34,7 @@
 from framework.flask import redirect
 from framework.sentry import log_exception
 from framework.transactions.handlers import no_auto_transaction
-from osf.metrics.es8_metrics import OsfCountedUsageRecord
+from osf.metrics.es8_metrics import OsfCountedUsageEvent
 from website import settings
 from addons.base import signals as file_signals
 from addons.base.utils import format_last_known_metadata, get_mfr_url
@@ -692,12 +692,12 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
                 version=fileversion.identifier,
                 path=file_node.path,
             )
-            OsfCountedUsageRecord.record(
+            OsfCountedUsageEvent.record(
                 user_id=getattr(user, '_id', None),
                 item_osfid=resource._id,
                 action_labels=[
-                    OsfCountedUsageRecord.ActionLabel.VIEW.value,
-                    OsfCountedUsageRecord.ActionLabel.WEB.value,
+                    OsfCountedUsageEvent.ActionLabel.VIEW.value,
+                    OsfCountedUsageEvent.ActionLabel.WEB.value,
                 ],
                 # HACK: we don't have the user request, so fabricate a one-off session id
                 # (this means no double-click filtering for anonymous users (same as before)
@@ -731,11 +731,11 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
                 version=fileversion.identifier,
                 path=file_node.path,
             )
-            OsfCountedUsageRecord.record(
+            OsfCountedUsageEvent.record(
                 user_id=getattr(user, '_id', None),
                 item_osfid=resource._id,
                 action_labels=[
-                    OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value,
+                    OsfCountedUsageEvent.ActionLabel.DOWNLOAD.value,
                 ],
                 # HACK: we don't have the user request, so fabricate a one-off session id
                 # (this means no double-click filtering for anonymous users (same as before)
diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py
index 1e867b43981..9e3f61f5b50 100644
--- a/api/metrics/serializers.py
+++ b/api/metrics/serializers.py
@@ -7,7 +7,7 @@
 from api.base.utils import absolute_reverse
 from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
 from osf.metrics.es8_metrics import (
-    OsfCountedUsageRecord,
+    OsfCountedUsageEvent,
     PageviewInfo as PageviewInfoEs8,
 )
 from website import settings as website_settings
@@ -72,7 +72,7 @@ def create(self, validated_data):
         if pageview_info_data := validated_data.get('pageview_info'):
             pageview_info = PageviewInfo(**pageview_info_data)
             pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
-        OsfCountedUsageRecord.record(
+        OsfCountedUsageEvent.record(
             item_osfid=validated_data['item_guid'],
             action_labels=validated_data.get('action_labels'),
             provider_id=validated_data.get('provider_id'),
diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index e41951a9232..a54cfca0b61 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -44,20 +44,20 @@
 
 _UNCHANGED_RECORDTYPES = {
     # reports
-    es6_reports.StorageAddonUsage: es8_metrics.StorageAddonUsageEs8,
-    es6_reports.DownloadCountReport: es8_metrics.DownloadCountReportEs8,
-    es6_reports.InstitutionSummaryReport: es8_metrics.InstitutionSummaryReportEs8,
-    es6_reports.NewUserDomainReport: es8_metrics.NewUserDomainReportEs8,
-    es6_reports.NodeSummaryReport: es8_metrics.NodeSummaryReportEs8,
-    es6_reports.OsfstorageFileCountReport: es8_metrics.OsfstorageFileCountReportEs8,
-    es6_reports.PreprintSummaryReport: es8_metrics.PreprintSummaryReportEs8,
-    es6_reports.UserSummaryReport: es8_metrics.UserSummaryReportEs8,
-    es6_reports.SpamSummaryReport: es8_metrics.SpamSummaryReportEs8,
-    es6_reports.InstitutionalUserReport: es8_metrics.InstitutionalUserReportEs8,
-    es6_reports.InstitutionMonthlySummaryReport: es8_metrics.InstitutionMonthlySummaryReportEs8,
-    es6_reports.PrivateSpamMetricsReport: es8_metrics.PrivateSpamMetricsReportEs8,
+    es6_reports.StorageAddonUsage: es8_metrics.DailyStorageAddonUsageEs8,
+    es6_reports.DownloadCountReport: es8_metrics.DailyDownloadCountReportEs8,
+    es6_reports.InstitutionSummaryReport: es8_metrics.DailyInstitutionSummaryReportEs8,
+    es6_reports.NewUserDomainReport: es8_metrics.DailyNewUserDomainReportEs8,
+    es6_reports.NodeSummaryReport: es8_metrics.DailyNodeSummaryReportEs8,
+    es6_reports.OsfstorageFileCountReport: es8_metrics.DailyOsfstorageFileCountReportEs8,
+    es6_reports.PreprintSummaryReport: es8_metrics.DailyPreprintSummaryReportEs8,
+    es6_reports.UserSummaryReport: es8_metrics.DailyUserSummaryReportEs8,
+    es6_reports.SpamSummaryReport: es8_metrics.MonthlySpamSummaryReportEs8,
+    es6_reports.InstitutionalUserReport: es8_metrics.MonthlyInstitutionalUserReportEs8,
+    es6_reports.InstitutionMonthlySummaryReport: es8_metrics.MonthlyInstitutionSummaryReportEs8,
+    es6_reports.PrivateSpamMetricsReport: es8_metrics.MonthlyPrivateSpamMetricsReportEs8,
     # events
-    RegistriesModerationMetrics: es8_metrics.RegistriesModerationMetricsEs8,
+    RegistriesModerationMetrics: es8_metrics.RegistriesModerationEventEs8,
 }
 
 _TASK_KWARGS = dict(
@@ -93,7 +93,7 @@ def migrate_unchanged_recordtype(es6_recordtype_name: str, until_when: str):
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_counted_usages(from_when: str, until_when: str):
-    # CountedAuthUsage => OsfCountedUsageRecord
+    # CountedAuthUsage => OsfCountedUsageEvent
     _each_new = (
         _convert_counted_usage(_hit['_source'])
         for _hit in _es6_scan_range(
@@ -103,12 +103,12 @@ def migrate_counted_usages(from_when: str, until_when: str):
             addl_filter={'exists': {'field': 'item_guid'}},
         )
     )
-    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageEvent, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_views(from_when: str, until_when: str):
-    # PreprintView => OsfCountedUsageRecord
+    # PreprintView => OsfCountedUsageEvent
     _action_labels = ['view', 'web']
     _each_new = (
         _convert_preprint_metric(_hit['_source'], _action_labels)
@@ -116,12 +116,12 @@ def migrate_preprint_views(from_when: str, until_when: str):
             PreprintView, from_when=from_when, until_when=until_when
         )
     )
-    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageEvent, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_preprint_downloads(from_when: str, until_when: str):
-    # PreprintDownload => OsfCountedUsageRecord
+    # PreprintDownload => OsfCountedUsageEvent
     _action_labels = ['download']
     _each_new = (
         _convert_preprint_metric(_hit['_source'], _action_labels)
@@ -129,12 +129,12 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
             PreprintDownload, from_when=from_when, until_when=until_when
         )
     )
-    _es8_bulk_save(es8_metrics.OsfCountedUsageRecord, _each_new)
+    _es8_bulk_save(es8_metrics.OsfCountedUsageEvent, _each_new)
 
 
 @celery_app.task(**_TASK_KWARGS)
 def migrate_usage_reports(osfid: str, until_when: str):
-    # from PublicItemUsageReport to PublicItemUsageReportEs8
+    # from PublicItemUsageReport to MonthlyPublicItemUsageReportEs8
     _osfguid = osfdb.Guid.load(osfid)
     _item_is_component = is_osf_component(_osfguid.referent) if _osfguid else False
 
@@ -157,7 +157,7 @@ def _each_new():
                 )
             )
 
-    _es8_bulk_save(es8_metrics.PublicItemUsageReportEs8, _each_new())
+    _es8_bulk_save(es8_metrics.MonthlyPublicItemUsageReportEs8, _each_new())
 
 
 ###
@@ -233,7 +233,7 @@ def _es6_usage_report_counts() -> tuple[int, int]:
 
 
 def _es8_usage_report_counts() -> tuple[int, int]:
-    _search = es8_metrics.PublicItemUsageReportEs8.search()
+    _search = es8_metrics.MonthlyPublicItemUsageReportEs8.search()
     _search.aggs.metric(
         'agg_item_count',
         'cardinality',
@@ -315,8 +315,8 @@ def _each_kwarg():
     return dict(_each_kwarg())
 
 
-def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
-    return es8_metrics.OsfCountedUsageRecord(
+def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageEvent:
+    return es8_metrics.OsfCountedUsageEvent(
         # fields from djelme.CountedUsageRecord:
         timestamp=source['timestamp'],
         sessionhour_id=source['session_id'],
@@ -329,7 +329,7 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
             osf_iri(_within_osfid)
             for _within_osfid in source.get('surrounding_guids', ())
         ],
-        # fields from OsfCountedUsageRecord:
+        # fields from OsfCountedUsageEvent:
         item_osfid=source['item_guid'],
         item_type=_convert_item_type(
             source.get('item_type'),
@@ -345,8 +345,8 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageRecord:
 
 def _convert_preprint_metric(
     source: dict, action_labels: list[str]
-) -> es8_metrics.OsfCountedUsageRecord:
-    return es8_metrics.OsfCountedUsageRecord.record(
+) -> es8_metrics.OsfCountedUsageEvent:
+    return es8_metrics.OsfCountedUsageEvent.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
         timestamp=datetime.datetime.fromisoformat(source['timestamp']),
@@ -358,7 +358,7 @@ def _convert_preprint_metric(
             provider_id=source.get('provider_id'),
             osf_model_name='preprint',
         ),
-        # fields from OsfCountedUsageRecord:
+        # fields from OsfCountedUsageEvent:
         item_osfid=source['preprint_id'],
         item_type=OSF.Preprint,
         item_public=True,
@@ -370,9 +370,9 @@ def _convert_preprint_metric(
 
 def _convert_public_usage_report(
     source: dict,
-    prior_report: es8_metrics.PublicItemUsageReportEs8 | None,
+    prior_report: es8_metrics.MonthlyPublicItemUsageReportEs8 | None,
     item_is_component: bool,
-) -> es8_metrics.PublicItemUsageReportEs8:
+) -> es8_metrics.MonthlyPublicItemUsageReportEs8:
     if prior_report is None:
         _c_views, _c_view_sess, _c_downloads, _c_download_sess = _get_cumulative_usage(
             osfid=source['item_osfid'],
@@ -390,7 +390,7 @@ def _convert_public_usage_report(
         _c_download_sess = prior_report.cumulative_download_session_count + source.get(
             'download_session_count', 0
         )
-    return es8_metrics.PublicItemUsageReportEs8(
+    return es8_metrics.MonthlyPublicItemUsageReportEs8(
         cycle_coverage=_semverish_from_yearmonth(source['report_yearmonth']),
         item_osfid=source['item_osfid'],
         item_type=_convert_item_type(
@@ -670,7 +670,7 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
             _es6_count = (
                 _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
             )
-            _es8_count = es8_metrics.OsfCountedUsageRecord.search().filter(_range_q).count()
+            _es8_count = es8_metrics.OsfCountedUsageEvent.search().filter(_range_q).count()
             self._write_tabbed('es6', PreprintView, _es6_pview_count)
             self._write_tabbed('es6', PreprintDownload, _es6_pdownload_count)
             self._write_tabbed('es6', CountedUsageEs6, _es6_usage_event_count)
@@ -679,13 +679,13 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
             )
             self._write_tabbed(
                 'es8',
-                es8_metrics.OsfCountedUsageRecord,
+                es8_metrics.OsfCountedUsageEvent,
                 _es8_count,
                 style=self._eq_style(_es8_count, _es6_count),
             )
         if start:  # schedule (per-day?) tasks (if --start)
             self.stdout.write(
-                f'starting usages => {es8_metrics.OsfCountedUsageRecord.__name__}'
+                f'starting usages => {es8_metrics.OsfCountedUsageEvent.__name__}'
             )
             for _from_date, _until_date in _date_range(_range_start, _range_end):
                 _from_str = _from_date.isoformat()
@@ -702,7 +702,7 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
             self._write_tabbed('es6', es6_reports.PublicItemUsageReport, _es6_count)
             self._write_tabbed(
                 'es8',
-                es8_metrics.PublicItemUsageReportEs8,
+                es8_metrics.MonthlyPublicItemUsageReportEs8,
                 _es8_count,
                 style=self._eq_style(_es8_count, _es6_count),
             )
@@ -714,7 +714,7 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
             )
             self._write_tabbed(
                 'es8',
-                es8_metrics.PublicItemUsageReportEs8,
+                es8_metrics.MonthlyPublicItemUsageReportEs8,
                 '(items)',
                 _es8_item_count,
                 style=self._eq_style(_es8_item_count, _es6_item_count),
@@ -723,7 +723,7 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
         # each item-task iter thru reports oldest to newest, adding cumulative counts
         if start:
             self.stdout.write(
-                f'starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.PublicItemUsageReportEs8.__name__}'
+                f'starting per-item {es6_reports.PublicItemUsageReport.__name__} => {es8_metrics.MonthlyPublicItemUsageReportEs8.__name__}'
             )
             for _osfid in _each_usage_report_osfid(
                 until_when=self._migration_started_at
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index 6bcf4f0d4f6..ad2ba5d9018 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -84,7 +84,7 @@ class PageviewInfo(esdsl.InnerDoc):
 ###
 # Event records
 
-class OsfCountedUsageRecord(djelme.CountedUsageRecord):
+class OsfCountedUsageEvent(djelme.CountedUsageRecord):
     '''
     Aim to support a COUNTER-style reporting api
     https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
@@ -259,7 +259,7 @@ def _clean_action_labels(self):
             self.action_labels = sorted(self.action_labels)
 
 
-class RegistriesModerationMetricsEs8(djelme.EventRecord):
+class RegistriesModerationEventEs8(djelme.EventRecord):
     UNIQUE_TOGETHER_FIELDS = (
         'timestamp', 'registration_id', 'trigger', 'from_state', 'to_state', 'user_id'
     )
@@ -273,7 +273,7 @@ class RegistriesModerationMetricsEs8(djelme.EventRecord):
     comment: str | None
 
     class Meta:
-        timeseries_recordtype_name = 'RegistriesModerationMetrics'
+        timeseries_recordtype_name = 'RegistriesModerationEvent'
         timeseries_index_timedepth = MONTHLY
 
 
@@ -333,27 +333,27 @@ class UsageByStorageAddon(esdsl.InnerDoc):
 # Cyclic reports
 
 
-class StorageAddonUsageEs8(djelme.CyclicRecord):
+class DailyStorageAddonUsageReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     usage_by_addon: list[UsageByStorageAddon]
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'StorageAddonUsage'
+        timeseries_recordtype_name = 'DailyStorageAddonUsageReport'
 
 
-class DownloadCountReportEs8(djelme.CyclicRecord):
+class DailyDownloadCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     daily_file_downloads: int
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'DownloadCountReport'
+        timeseries_recordtype_name = 'DailyDownloadCountReport'
 
 
-class InstitutionSummaryReportEs8(djelme.CyclicRecord):
+class DailyInstitutionSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',)
 
@@ -367,10 +367,10 @@ class InstitutionSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'InstitutionSummaryReport'
+        timeseries_recordtype_name = 'DailyInstitutionSummaryReport'
 
 
-class NewUserDomainReportEs8(djelme.CyclicRecord):
+class DailyNewUserDomainReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',)
 
@@ -379,10 +379,10 @@ class NewUserDomainReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'NewUserDomainReport'
+        timeseries_recordtype_name = 'DailyNewUserDomainReport'
 
 
-class NodeSummaryReportEs8(djelme.CyclicRecord):
+class DailyNodeSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     nodes: NodeRunningTotals
@@ -392,20 +392,20 @@ class NodeSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'NodeSummaryReport'
+        timeseries_recordtype_name = 'DailyNodeSummaryReport'
 
 
-class OsfstorageFileCountReportEs8(djelme.CyclicRecord):
+class DailyOsfstorageFileCountReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     files: FileRunningTotals
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'OsfstorageFileCountReport'
+        timeseries_recordtype_name = 'DailyOsfstorageFileCountReport'
 
 
-class PreprintSummaryReportEs8(djelme.CyclicRecord):
+class DailyPreprintSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',)
@@ -414,10 +414,10 @@ class PreprintSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'PreprintSummaryReport'
+        timeseries_recordtype_name = 'DailyPreprintSummaryReport'
 
 
-class UserSummaryReportEs8(djelme.CyclicRecord):
+class DailyUserSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = DAILY
 
     active: int
@@ -429,10 +429,10 @@ class UserSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'UserSummaryReport'
+        timeseries_recordtype_name = 'DailyUserSummaryReport'
 
 
-class SpamSummaryReportEs8(djelme.CyclicRecord):
+class MonthlySpamSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
 
     node_confirmed_spam: int
@@ -449,10 +449,10 @@ class SpamSummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'SpamSummaryReport'
+        timeseries_recordtype_name = 'MonthlySpamSummaryReport'
 
 
-class InstitutionalUserReportEs8(djelme.CyclicRecord):
+class MonthlyInstitutionalUserReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',)
 
@@ -476,10 +476,10 @@ class InstitutionalUserReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'InstitutionalUserReport'
+        timeseries_recordtype_name = 'MonthlyInstitutionalUserReport'
 
 
-class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
+class MonthlyInstitutionSummaryReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', )
 
@@ -497,10 +497,10 @@ class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'InstitutionMonthlySummaryReport'
+        timeseries_recordtype_name = 'MonthlyInstitutionSummaryReport'
 
 
-class PublicItemUsageReportEs8(djelme.CyclicRecord):
+class MonthlyPublicItemUsageReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
     UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid')
 
@@ -526,10 +526,10 @@ class PublicItemUsageReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'PublicItemUsageReport'
+        timeseries_recordtype_name = 'MonthlyPublicItemUsageReport'
 
 
-class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
+class MonthlyPrivateSpamMetricsReportEs8(djelme.CyclicRecord):
     CYCLE_TIMEDEPTH = MONTHLY
 
     node_oopspam_flagged: int
@@ -543,7 +543,7 @@ class PrivateSpamMetricsReportEs8(djelme.CyclicRecord):
 
     class Meta:
         timeseries_index_timedepth = YEARLY
-        timeseries_recordtype_name = 'PrivateSpamMetricsReport'
+        timeseries_recordtype_name = 'MonthlyPrivateSpamMetricsReport'
 
 
 ###
diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py
index bbc6d0cb655..4350c1440a1 100644
--- a/osf/metrics/reporters/download_count.py
+++ b/osf/metrics/reporters/download_count.py
@@ -1,6 +1,6 @@
 from osf.models import PageCounter
 from osf.metrics.reports import DownloadCountReport
-from osf.metrics.es8_metrics import DownloadCountReportEs8
+from osf.metrics.es8_metrics import DailyDownloadCountReportEs8
 from osf.metrics.utils import cycle_coverage_date
 from ._base import DailyReporter
 
@@ -9,7 +9,7 @@ class DownloadCountReporter(DailyReporter):
     def report(self, date):
         download_count = int(PageCounter.get_all_downloads_on_date(date) or 0)
         reports = []
-        report_es8 = DownloadCountReportEs8(
+        report_es8 = DailyDownloadCountReportEs8(
             cycle_coverage=cycle_coverage_date(date),
             daily_file_downloads=download_count,
         )
diff --git a/osf/metrics/reporters/institution_summary.py b/osf/metrics/reporters/institution_summary.py
index 1a6cfbbbca7..1148f2456e5 100644
--- a/osf/metrics/reporters/institution_summary.py
+++ b/osf/metrics/reporters/institution_summary.py
@@ -10,7 +10,7 @@
 )
 from osf.models import Institution
 from osf.metrics.es8_metrics import (
-    InstitutionSummaryReportEs8,
+    DailyInstitutionSummaryReportEs8,
     RunningTotal as RunningTotalEs8,
     NodeRunningTotals as NodeRunningTotalsEs8,
     RegistrationRunningTotals as RegistrationRunningTotalsEs8
@@ -45,7 +45,7 @@ def report(self, date):
                 created__date__lte=date,
                 type='osf.registration',
             )
-            report_es8 = InstitutionSummaryReportEs8(
+            report_es8 = DailyInstitutionSummaryReportEs8(
                 cycle_coverage=cycle_coverage_date(date),
                 institution_id=institution._id,
                 institution_name=institution.name,
diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py
index 1f9afaaf7f7..88d8e1fb891 100644
--- a/osf/metrics/reporters/institution_summary_monthly.py
+++ b/osf/metrics/reporters/institution_summary_monthly.py
@@ -5,7 +5,7 @@
 from osf.models.spam import SpamStatus
 from addons.osfstorage.models import OsfStorageFile
 from osf.metrics.reports import InstitutionMonthlySummaryReport
-from osf.metrics.es8_metrics import InstitutionMonthlySummaryReportEs8
+from osf.metrics.es8_metrics import MonthlyInstitutionSummaryReportEs8
 from osf.metrics.utils import cycle_coverage_yearmonth
 from ._base import MonthlyReporter
 
@@ -34,7 +34,7 @@ def generate_report(self, institution):
 
         preprint_queryset = self.get_published_preprints(institution, self.yearmonth)
         reports = []
-        report_es8 = InstitutionMonthlySummaryReportEs8(
+        report_es8 = MonthlyInstitutionSummaryReportEs8(
             cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             institution_id=institution._id,
             user_count=institution.get_institution_users().count(),
diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py
index ae36e59196c..31c31f8ff22 100644
--- a/osf/metrics/reporters/institutional_users.py
+++ b/osf/metrics/reporters/institutional_users.py
@@ -9,7 +9,7 @@
 from addons.osfstorage.models import OsfStorageFile
 from osf.metrics.reports import InstitutionalUserReport
 from osf.metrics.utils import YearMonth, cycle_coverage_yearmonth
-from osf.metrics.es8_metrics import InstitutionalUserReportEs8
+from osf.metrics.es8_metrics import MonthlyInstitutionalUserReportEs8
 from ._base import MonthlyReporter
 
 
@@ -50,12 +50,12 @@ class _InstiUserReportHelper:
     institution: osfdb.Institution
     user: osfdb.OSFUser
     yearmonth: YearMonth
-    reports: List[InstitutionalUserReport | InstitutionalUserReportEs8] = dataclasses.field(init=False)
+    reports: List[InstitutionalUserReport | MonthlyInstitutionalUserReportEs8] = dataclasses.field(init=False)
 
     def __post_init__(self):
         _affiliation = self.user.get_institution_affiliation(self.institution._id)
         self.reports = []
-        report_es8 = InstitutionalUserReportEs8(
+        report_es8 = MonthlyInstitutionalUserReportEs8(
             cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             institution_id=self.institution._id,
             user_id=self.user._id,
diff --git a/osf/metrics/reporters/new_user_domain.py b/osf/metrics/reporters/new_user_domain.py
index a19abaeb22f..125e02754d7 100644
--- a/osf/metrics/reporters/new_user_domain.py
+++ b/osf/metrics/reporters/new_user_domain.py
@@ -3,7 +3,7 @@
 
 from osf.models import OSFUser
 from osf.metrics.reports import NewUserDomainReport
-from osf.metrics.es8_metrics import NewUserDomainReportEs8
+from osf.metrics.es8_metrics import DailyNewUserDomainReportEs8
 from osf.metrics.utils import cycle_coverage_date
 from ._base import DailyReporter
 
@@ -24,7 +24,7 @@ def report(self, date):
         )
         reports = []
         for domain_name, count in domain_names.items():
-            report_es8 = NewUserDomainReportEs8(
+            report_es8 = DailyNewUserDomainReportEs8(
                 cycle_coverage=cycle_coverage_date(date),
                 domain_name=domain_name,
                 new_user_count=count,
diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py
index 8e9842ae78e..23f4c9bb78c 100644
--- a/osf/metrics/reporters/node_count.py
+++ b/osf/metrics/reporters/node_count.py
@@ -8,7 +8,7 @@
     RegistrationRunningTotals,
 )
 from osf.metrics.es8_metrics import (
-    NodeSummaryReportEs8,
+    DailyNodeSummaryReportEs8,
     NodeRunningTotals as NodeRunningTotalsEs8,
     RegistrationRunningTotals as RegistrationRunningTotalsEs8
 )
@@ -41,7 +41,7 @@ def report(self, date):
 
         exclude_spam = ~Q(spam_status__in=[SpamStatus.SPAM, SpamStatus.FLAGGED])
         reports = []
-        report_es8 = NodeSummaryReportEs8(
+        report_es8 = DailyNodeSummaryReportEs8(
             cycle_coverage=cycle_coverage_date(date),
             # Nodes - the number of projects and components
             nodes=NodeRunningTotalsEs8(
diff --git a/osf/metrics/reporters/osfstorage_file_count.py b/osf/metrics/reporters/osfstorage_file_count.py
index f93ed180ebb..6ddeb89945b 100644
--- a/osf/metrics/reporters/osfstorage_file_count.py
+++ b/osf/metrics/reporters/osfstorage_file_count.py
@@ -5,7 +5,7 @@
 from osf.metrics.reports import OsfstorageFileCountReport, FileRunningTotals
 from osf.models import AbstractNode, Preprint
 from osf.metrics.es8_metrics import (
-    OsfstorageFileCountReportEs8,
+    DailyOsfstorageFileCountReportEs8,
     FileRunningTotals as FileRunningTotalsEs8
 )
 from osf.metrics.utils import cycle_coverage_date
@@ -38,7 +38,7 @@ def report(self, date):
 
         reports = []
 
-        report_es8 = OsfstorageFileCountReportEs8(
+        report_es8 = DailyOsfstorageFileCountReportEs8(
             cycle_coverage=cycle_coverage_date(date),
             files=FileRunningTotalsEs8(
                 total=file_qs.count(),
diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py
index 7827f0ef40c..85ba639a32f 100644
--- a/osf/metrics/reporters/preprint_count.py
+++ b/osf/metrics/reporters/preprint_count.py
@@ -3,7 +3,7 @@
 
 from osf.metrics import PreprintSummaryReport
 from website import settings
-from osf.metrics.es8_metrics import PreprintSummaryReportEs8
+from osf.metrics.es8_metrics import DailyPreprintSummaryReportEs8
 from osf.metrics.utils import cycle_coverage_date
 from ._base import DailyReporter
 
@@ -51,7 +51,7 @@ def report(self, date):
             elastic_query = get_elastic_query(date, preprint_provider)
             resp = requests.post(f'{settings.SHARE_URL}api/v2/search/creativeworks/_search', json=elastic_query).json()
 
-            report_es8 = PreprintSummaryReportEs8(
+            report_es8 = DailyPreprintSummaryReportEs8(
                 cycle_coverage=cycle_coverage_date(date),
                 provider_key=preprint_provider._id,
                 preprint_count=resp['hits']['total'],
diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py
index 49850605cd0..fde545247e6 100644
--- a/osf/metrics/reporters/private_spam_metrics.py
+++ b/osf/metrics/reporters/private_spam_metrics.py
@@ -1,7 +1,7 @@
 from osf.metrics.reports import PrivateSpamMetricsReport
 from osf.external.oopspam.client import OOPSpamClient
 from osf.external.askismet.client import AkismetClient
-from osf.metrics.es8_metrics import PrivateSpamMetricsReportEs8
+from osf.metrics.es8_metrics import MonthlyPrivateSpamMetricsReportEs8
 from osf.metrics.utils import cycle_coverage_yearmonth
 from ._base import MonthlyReporter
 
@@ -18,7 +18,7 @@ def report(self):
 
         reports = []
 
-        report_es8 = PrivateSpamMetricsReportEs8(
+        report_es8 = MonthlyPrivateSpamMetricsReportEs8(
             cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'),
             node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'),
diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py
index d9b0dd0734c..085bac38684 100644
--- a/osf/metrics/reporters/public_item_usage.py
+++ b/osf/metrics/reporters/public_item_usage.py
@@ -4,7 +4,7 @@
 
 import waffle
 
-from osf.metrics.es8_metrics import PublicItemUsageReportEs8
+from osf.metrics.es8_metrics import MonthlyPublicItemUsageReportEs8
 
 if typing.TYPE_CHECKING:
     import elasticsearch6_dsl as edsl
@@ -135,11 +135,11 @@ def _preprintdownload_osfids(self, after_osfid: str | None) -> typing.Iterator[s
         )
         return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid)
 
-    def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | PublicItemUsageReportEs8]:
+    def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | MonthlyPublicItemUsageReportEs8]:
         if not _is_item_public(osf_obj):
             raise _SkipItem
         reports = []
-        report_es8 = PublicItemUsageReportEs8(
+        report_es8 = MonthlyPublicItemUsageReportEs8(
             cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             item_osfid=osf_obj._id,
             item_type=[get_item_type(osf_obj)],
diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py
index 23c74697a54..2fbac671ad1 100644
--- a/osf/metrics/reporters/spam_count.py
+++ b/osf/metrics/reporters/spam_count.py
@@ -3,7 +3,7 @@
 from osf.metrics.reports import SpamSummaryReport
 from osf.models import PreprintLog, NodeLog
 from osf.models.spam import SpamStatus
-from osf.metrics.es8_metrics import SpamSummaryReportEs8
+from osf.metrics.es8_metrics import MonthlySpamSummaryReportEs8
 from osf.metrics.utils import cycle_coverage_yearmonth
 from ._base import MonthlyReporter
 
@@ -14,7 +14,7 @@ def report(self, **report_kwargs):
         target_month = self.yearmonth.month_start()
         next_month = self.yearmonth.month_end()
         reports = []
-        report_es8 = SpamSummaryReportEs8(
+        report_es8 = MonthlySpamSummaryReportEs8(
             cycle_coverage=cycle_coverage_yearmonth(self.yearmonth),
             node_confirmed_spam=NodeLog.objects.filter(
                 action=NodeLog.CONFIRM_SPAM,
diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index d17528f98fb..0c4662b55af 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -14,7 +14,7 @@
 from osf.models import SpamStatus, Tag
 from website import settings
 from osf.metrics.es8_metrics import (
-    StorageAddonUsageEs8,
+    DailyStorageAddonUsageReportEs8,
     UsageByStorageAddon as UsageByStorageAddonEs8,
     RunningTotal as RunningTotalEs8
 )
@@ -201,7 +201,7 @@ def report(self, date):
             )
             usage_by_addon.append(usage_by_storage_addon)
         reports = []
-        report_es8 = StorageAddonUsageEs8(
+        report_es8 = DailyStorageAddonUsageReportEs8(
             cycle_coverage=cycle_coverage_date(date),
             usage_by_addon=usage_by_addon,
         )
diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py
index 089fcb63f47..121b830c466 100644
--- a/osf/metrics/reporters/user_count.py
+++ b/osf/metrics/reporters/user_count.py
@@ -1,7 +1,7 @@
 from osf.models import OSFUser
 
 from osf.metrics import UserSummaryReport
-from osf.metrics.es8_metrics import UserSummaryReportEs8
+from osf.metrics.es8_metrics import DailyUserSummaryReportEs8
 from osf.metrics.utils import cycle_coverage_date
 from ._base import DailyReporter
 
@@ -10,7 +10,7 @@ class UserCountReporter(DailyReporter):
 
     def report(self, report_date):
         reports = []
-        report_es8 = UserSummaryReportEs8(
+        report_es8 = DailyUserSummaryReportEs8(
             cycle_coverage=cycle_coverage_date(report_date),
             active=OSFUser.objects.filter(is_active=True, date_confirmed__date__lte=report_date).count(),
             deactivated=OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__date__lte=report_date).count(),
diff --git a/osf/models/registrations.py b/osf/models/registrations.py
index 4afc929ba79..f13489f1201 100644
--- a/osf/models/registrations.py
+++ b/osf/models/registrations.py
@@ -25,7 +25,7 @@
 from osf.exceptions import NodeStateError, DraftRegistrationStateError
 from osf.external.internet_archive.tasks import archive_to_ia, update_ia_metadata
 from osf.metrics import RegistriesModerationMetrics
-from osf.metrics.es8_metrics import RegistriesModerationMetricsEs8
+from osf.metrics.es8_metrics import RegistriesModerationEventEs8
 from osf.models.notification_type import NotificationTypeEnum
 from .action import RegistrationAction
 from .archive import ArchiveJob
@@ -787,7 +787,7 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment
         action.save()
         if waffle.switch_is_active(features.ELASTICSEARCH_METRICS):
             RegistriesModerationMetrics.record_transitions(action)
-            RegistriesModerationMetricsEs8.record(
+            RegistriesModerationEventEs8.record(
                 registration_id=action.target._id,
                 provider_id=action.target.provider._id,
                 from_state=action.from_state,
diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py
index a871054e96b..5bc6e4c4bc4 100644
--- a/osf_tests/metrics/test_es8_metrics.py
+++ b/osf_tests/metrics/test_es8_metrics.py
@@ -5,8 +5,8 @@
 
 from osf.metrics.es8_metrics import (
     PageviewInfo,
-    DownloadCountReportEs8,
-    OsfCountedUsageRecord,
+    DailyDownloadCountReportEs8,
+    OsfCountedUsageEvent,
 )
 
 
@@ -19,7 +19,7 @@ def _real_elastic(self):
             yield
 
     def test_nested_pageview_autofill(self):
-        usage = OsfCountedUsageRecord.record(
+        usage = OsfCountedUsageEvent.record(
             timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
             sessionhour_id='blah',
             database_iri='https://osf.example/provider',
@@ -42,7 +42,7 @@ def test_nested_pageview_autofill(self):
         assert usage.item_iri in usage.within_iris
 
     def test_nested_pageview_autofill_dict(self):
-        usage = OsfCountedUsageRecord.record(
+        usage = OsfCountedUsageEvent.record(
             timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
             sessionhour_id='blah',
             database_iri='https://osf.example/provider',
@@ -65,7 +65,7 @@ def test_nested_pageview_autofill_dict(self):
         assert usage.item_iri in usage.within_iris
 
     def test_none_pageview_nested_autofill(self):
-        usage = OsfCountedUsageRecord.record(
+        usage = OsfCountedUsageEvent.record(
             timestamp=datetime.datetime(2024, 1, 1, 15, 0, tzinfo=datetime.UTC),
             sessionhour_id='blah',
             database_iri='https://osf.example/provider',
@@ -80,12 +80,12 @@ def test_none_pageview_nested_autofill(self):
         assert usage.item_iri in usage.within_iris
 
     def test_save_report(self):
-        _saved = DownloadCountReportEs8.record(
+        _saved = DailyDownloadCountReportEs8.record(
             cycle_coverage='2026.1.1',
             daily_file_downloads=17,
         )
-        DownloadCountReportEs8.refresh()
-        _response = DownloadCountReportEs8.search().execute()
+        DailyDownloadCountReportEs8.refresh()
+        _response = DailyDownloadCountReportEs8.search().execute()
         (_fetched,) = _response
         assert _fetched.meta.id == _saved.meta.id
         assert _fetched.cycle_coverage == '2026.1.1'

From ea6ac1e1d104856c86560ab213e75d9fffa14efe Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 13:10:12 -0400
Subject: [PATCH 095/100] fix: broken import

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index a54cfca0b61..4864ff44d1f 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -44,7 +44,7 @@
 
 _UNCHANGED_RECORDTYPES = {
     # reports
-    es6_reports.StorageAddonUsage: es8_metrics.DailyStorageAddonUsageEs8,
+    es6_reports.StorageAddonUsage: es8_metrics.DailyStorageAddonUsageReportEs8,
     es6_reports.DownloadCountReport: es8_metrics.DailyDownloadCountReportEs8,
     es6_reports.InstitutionSummaryReport: es8_metrics.DailyInstitutionSummaryReportEs8,
     es6_reports.NewUserDomainReport: es8_metrics.DailyNewUserDomainReportEs8,

From 093066eb083c0c0036f6b18e813b6b6a787d885a Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 16:42:50 -0400
Subject: [PATCH 096/100] fix: idempotent event migration

---
 .../commands/migrate_osfmetrics_6to8.py       | 34 +++++++++----------
 osf/metrics/es8_metrics.py                    |  6 ++--
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 4864ff44d1f..ef862046393 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -2,7 +2,6 @@
 import datetime
 import functools
 import logging
-import uuid
 
 from django.apps import apps
 from django.core.management import call_command
@@ -111,7 +110,7 @@ def migrate_preprint_views(from_when: str, until_when: str):
     # PreprintView => OsfCountedUsageEvent
     _action_labels = ['view', 'web']
     _each_new = (
-        _convert_preprint_metric(_hit['_source'], _action_labels)
+        _convert_preprint_metric(_hit, _action_labels)
         for _hit in _es6_scan_range(
             PreprintView, from_when=from_when, until_when=until_when
         )
@@ -124,7 +123,7 @@ def migrate_preprint_downloads(from_when: str, until_when: str):
     # PreprintDownload => OsfCountedUsageEvent
     _action_labels = ['download']
     _each_new = (
-        _convert_preprint_metric(_hit['_source'], _action_labels)
+        _convert_preprint_metric(_hit, _action_labels)
         for _hit in _es6_scan_range(
             PreprintDownload, from_when=from_when, until_when=until_when
         )
@@ -335,35 +334,37 @@ def _convert_counted_usage(source: dict) -> es8_metrics.OsfCountedUsageEvent:
             source.get('item_type'),
             has_surrounding_items=bool(source.get('surrounding_guids')),
         ),
-        item_public=source.get('item_public'),
-        provider_id=source.get('provider_id'),
-        user_is_authenticated=source.get('user_is_authenticated'),
+        item_public=source.get('item_public', True),
+        provider_id=source.get('provider_id', 'osf'),
+        user_is_authenticated=source.get('user_is_authenticated', False),
         action_labels=source.get('action_labels'),
         pageview_info=source.get('pageview_info'),
     )
 
 
 def _convert_preprint_metric(
-    source: dict, action_labels: list[str]
+    hit: dict, action_labels: list[str]
 ) -> es8_metrics.OsfCountedUsageEvent:
+    _source = hit['_source']
+    _doc_id = hit['_id']
     return es8_metrics.OsfCountedUsageEvent.record(
         using=False,  # don't save yet; will save in bulk
         # fields used to compute a sessionhour_id:
-        timestamp=datetime.datetime.fromisoformat(source['timestamp']),
-        user_id=source.get('user_id'),
-        client_session_id=str(uuid.uuid4()),
+        timestamp=datetime.datetime.fromisoformat(_source['timestamp']),
+        user_id=_source.get('user_id'),
+        client_session_id=_doc_id,  # unique session per event (best can do)
         # fields from djelme.CountedUsageRecord:
         platform_iri=website_settings.DOMAIN,
         database_iri=_convert_database_iri(
-            provider_id=source.get('provider_id'),
+            provider_id=_source.get('provider_id'),
             osf_model_name='preprint',
         ),
         # fields from OsfCountedUsageEvent:
-        item_osfid=source['preprint_id'],
+        item_osfid=_source['preprint_id'],
         item_type=OSF.Preprint,
         item_public=True,
-        provider_id=source.get('provider_id'),
-        user_is_authenticated=bool(source.get('user_id')),
+        provider_id=_source.get('provider_id'),
+        user_is_authenticated=bool(_source.get('user_id')),
         action_labels=action_labels,
     )
 
@@ -508,10 +509,7 @@ def _cumulative_preprint_count(preprint_metric_cls, osfid: str, until_when: str)
 
 def _convert_item_type(osf_model_name: str | list[str] | None, has_surrounding_items: bool):
     if isinstance(osf_model_name, list):
-        return [
-            _convert_item_type(_model_name, has_surrounding_items)
-            for _model_name in osf_model_name
-        ]
+        osf_model_name = osf_model_name[0] if osf_model_name else None
     if osf_model_name:
         try:
             return osfmap_type_from_model(
diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py
index ad2ba5d9018..1d73009ed5b 100644
--- a/osf/metrics/es8_metrics.py
+++ b/osf/metrics/es8_metrics.py
@@ -155,14 +155,14 @@ def _timestamp_30sec_window(self):
         # for UNIQUE_TOGETHER_FIELDS
         # slice the day into an array of 30-second windows,
         # find this timestamp's windowslice index
-        day_start = datetime.datetime(
+        _day_start = datetime.datetime(
             self.timestamp.year,
             self.timestamp.month,
             self.timestamp.day,
             tzinfo=self.timestamp.tzinfo,
         )
-        time_in_seconds = (self.timestamp - day_start).total_seconds()
-        return int(time_in_seconds / 30)  # 30-second windows
+        _time_in_seconds = (self.timestamp - _day_start).total_seconds()
+        return int(_time_in_seconds / 30)  # 30-second windows
 
     @functools.cached_property
     def _osfid_referent(self):

From df9d1180ef7e410e7c509e0911daa6ac62710504 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Wed, 29 Apr 2026 16:47:36 -0400
Subject: [PATCH 097/100] allow clearing migration targets

---
 admin/management/views.py                     |  1 +
 admin/templates/management/commands.html      |  3 ++-
 .../commands/migrate_osfmetrics_6to8.py       | 23 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/admin/management/views.py b/admin/management/views.py
index 294c79afcaf..3c112347529 100644
--- a/admin/management/views.py
+++ b/admin/management/views.py
@@ -202,6 +202,7 @@ def post(self, request):
             'no_color': True,
             'no_counts': request.POST.get('no_counts'),
             'clear_state': request.POST.get('clear_state'),
+            'clear_es8_data': request.POST.get('clear_es8_data'),
             'start': request.POST.get('start'),
             'unchanged': request.POST.get('unchanged'),
             'usage_reports': request.POST.get('usage_reports'),
diff --git a/admin/templates/management/commands.html b/admin/templates/management/commands.html
index 8439612902d..6b9ee927e0d 100644
--- a/admin/templates/management/commands.html
+++ b/admin/templates/management/commands.html
@@ -187,9 +187,10 @@ <h4><u>migrate osf-metrics 6to8</u></h4>
                       action="{% url 'management:migrate_osfmetrics_6to8'%}"
                       style="display: flex; flex-direction: column;">
                     {% csrf_token %}
-                    <label><input type="checkbox" name="start"> start tasks (caution)</label>
                     <label><input type="checkbox" name="no_counts"> no counts</label>
+                    <label><input type="checkbox" name="start"> start tasks (caution)</label>
                     <label><input type="checkbox" name="clear_state"> reset migration start time (caution)</label>
+                    <label><input type="checkbox" name="clear_es8_data"> clear es8 data (big caution)</label>
                     <fieldset>
                         (narrow types:
                         <label><input type="checkbox" name="unchanged"> unchanged events and reports</label>
diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index ef862046393..2d087869fb1 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -575,6 +575,10 @@ def add_arguments(self, parser):
             '--clear-state',
             action='store_true',
         )
+        parser.add_argument(
+            '--clear-es8-data',
+            action='store_true',
+        )
         parser.add_argument(
             '--start',
             action='store_true',
@@ -602,6 +606,7 @@ def handle(
         no_setup,
         no_counts,
         clear_state,
+        clear_es8_data,
         start,
         unchanged,
         usage_events,
@@ -613,6 +618,8 @@ def handle(
             call_command('djelme_backend_setup')
         if clear_state:
             self._clear_state()
+        if clear_es8_data:
+            self._clear_es8_data(unchanged, usage_events, usage_reports)
         self._check_started_at(start_now=start)
         _default_all = not any((unchanged, usage_events, usage_reports))
         if unchanged or _default_all:
@@ -754,6 +761,22 @@ def _clear_state(self):
         es8_metrics.Elastic6To8State.search().query({'match_all': {}}).delete()
         es8_metrics.Elastic6To8State.refresh()
 
+    def _clear_es8_data(self, unchanged, usage_events, usage_reports):
+        _default_all = not any((unchanged, usage_events, usage_reports))
+        _to_clear = []
+        if _default_all or unchanged:
+            _to_clear.extend(_UNCHANGED_RECORDTYPES.values())
+        if _default_all or usage_events:
+            _to_clear.append(es8_metrics.MonthlyPublicItemUsageReportEs8)
+        if _default_all or usage_reports:
+            _to_clear.append(es8_metrics.OsfCountedUsageEvent)
+        for _es8_recordtype in _to_clear:
+            self.stdout.write(
+                f'clearing {_es8_recordtype.__name__}', self.style.NOTICE
+            )
+            _es8_recordtype.search().query({'match_all': {}}).delete()
+            _es8_recordtype.refresh()
+
     def _eq_style(self, num: int, should_be: int):
         return self.style.SUCCESS if (num == should_be) else self.style.WARNING
 

From 655d9dc73a01da818bfee6229ff59d0b4cc37bd3 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 30 Apr 2026 08:54:02 -0400
Subject: [PATCH 098/100] fix: skip gv addons in storage_addon_usage

---
 osf/metrics/reporters/storage_addon_usage.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py
index 0c4662b55af..893373cebd1 100644
--- a/osf/metrics/reporters/storage_addon_usage.py
+++ b/osf/metrics/reporters/storage_addon_usage.py
@@ -134,8 +134,13 @@ def report(self, date):
         usage_by_addon_es8 = []
         usage_by_addon = []
         for short_name, addon_config in storage_addon_configs.items():
-            user_counts = storage_addon_user_counts(date, addon_config.get_model('UserSettings'))
-            node_counts = storage_addon_node_counts(date, addon_config.get_model('NodeSettings'))
+            try:
+                _UserSettings = addon_config.get_model('UserSettings')
+                _NodeSettings = addon_config.get_model('NodeSettings')
+            except LookupError:
+                continue
+            user_counts = storage_addon_user_counts(date, _UserSettings)
+            node_counts = storage_addon_node_counts(date, _NodeSettings)
             usage_by_storage_addon_es_8 = UsageByStorageAddonEs8(
                 addon_shortname=short_name,
                 enabled_usersettings=RunningTotalEs8(

From ceb9409c31a7b3b570afacb1e05f305f150ef019 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Thu, 30 Apr 2026 09:26:58 -0400
Subject: [PATCH 099/100] fix: es6 usage count to migrate

---
 osf/management/commands/migrate_osfmetrics_6to8.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/osf/management/commands/migrate_osfmetrics_6to8.py b/osf/management/commands/migrate_osfmetrics_6to8.py
index 2d087869fb1..0c71d7b2307 100644
--- a/osf/management/commands/migrate_osfmetrics_6to8.py
+++ b/osf/management/commands/migrate_osfmetrics_6to8.py
@@ -669,9 +669,14 @@ def _handle_usage_events(self, *, start: bool, no_counts: bool):
                     }
                 }
             }
+            _es6_usage_count_q = {
+                'bool': {
+                    'filter': [_range_q, {'exists': {'field': 'item_guid'}}],
+                },
+            }
             _es6_pview_count = PreprintView.search().filter(_range_q).count()
             _es6_pdownload_count = PreprintDownload.search().filter(_range_q).count()
-            _es6_usage_event_count = CountedUsageEs6.search().filter(_range_q).count()
+            _es6_usage_event_count = CountedUsageEs6.search().filter(_es6_usage_count_q).count()
             _es6_count = (
                 _es6_pview_count + _es6_pdownload_count + _es6_usage_event_count
             )
@@ -720,7 +725,7 @@ def _handle_usage_reports(self, *, start: bool, no_counts: bool):
             self._write_tabbed(
                 'es8',
                 es8_metrics.MonthlyPublicItemUsageReportEs8,
-                '(items)',
+                'osfid count:',
                 _es8_item_count,
                 style=self._eq_style(_es8_item_count, _es6_item_count),
             )

From aa4025c806d8fbfc3b8a3d01b2e08cd064da1ac6 Mon Sep 17 00:00:00 2001
From: abram axel booth <boothaa@gmail.com>
Date: Mon, 4 May 2026 16:53:01 -0400
Subject: [PATCH 100/100] fix: monthly institution reporter

---
 osf/management/commands/monthly_reporters_go.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py
index 6e2b1c9bc72..c5dd4034777 100644
--- a/osf/management/commands/monthly_reporters_go.py
+++ b/osf/management/commands/monthly_reporters_go.py
@@ -3,12 +3,14 @@
 
 from django.core.management.base import BaseCommand
 from django.db import OperationalError as DjangoOperationalError
-from elasticsearch6.exceptions import ConnectionError as ElasticConnectionError
+from elasticsearch6.exceptions import ConnectionError as Elastic6ConnectionError
+from elasticsearch8.exceptions import ConnectionError as Elastic8ConnectionError
 from psycopg2 import OperationalError as PostgresOperationalError
 
 from framework.celery_tasks import app as celery_app
 import framework.sentry
 from osf.metrics.reporters import AllMonthlyReporters
+from osf.metrics.reports import MonthlyReport
 from osf.metrics.utils import YearMonth
 
 
@@ -17,7 +19,8 @@
 
 _CONTINUE_AFTER_ERRORS = (
     DjangoOperationalError,
-    ElasticConnectionError,
+    Elastic6ConnectionError,
+    Elastic8ConnectionError,
     PostgresOperationalError,
 )
 
@@ -70,11 +73,7 @@ def schedule_monthly_reporter(
 
 @celery_app.task(
     name='management.commands.monthly_reporter_do',
-    autoretry_for=(
-        DjangoOperationalError,
-        ElasticConnectionError,
-        PostgresOperationalError,
-    ),
+    autoretry_for=_CONTINUE_AFTER_ERRORS,
     max_retries=5,
     retry_backoff=True,
 )
@@ -87,7 +86,8 @@ def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict):
 
     _reports = _reporter.report(**report_kwargs)
     for _report in _reports:
-        _report.report_yearmonth = _reporter.yearmonth
+        if isinstance(_report, MonthlyReport) and (_report.report_yearmonth is None):
+            _report.report_yearmonth = _reporter.yearmonth
         _report.save()
         _followup_task = _reporter.followup_task(_report)
         if _followup_task is not None: