From f872ce5d31996808e801c7f5020d1e47cbf6dd73 Mon Sep 17 00:00:00 2001 From: Deep Joshi Date: Wed, 10 Jun 2026 19:37:05 +0530 Subject: [PATCH 1/4] Update source URL and modify log file structure --- utils/arc_agi/adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/arc_agi/adapter.py b/utils/arc_agi/adapter.py index c209569a3..a8af95c7e 100644 --- a/utils/arc_agi/adapter.py +++ b/utils/arc_agi/adapter.py @@ -11,8 +11,7 @@ from every_eval_ever.helpers import SCHEMA_VERSION -SOURCE_URL = "https://arcprize.org/media/data/leaderboard/evaluations.json" - +SOURCE_URL = "https://github.com/fchollet/ARC-AGI/tree/master/data" def make_source_data() -> dict: return { @@ -307,9 +306,10 @@ def make_log( def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: - out_dir = out_root / "arc-agi" / developer / model + filename = uuid.uuid4() + out_dir = out_root / filename[0:2] / filename[2:4] out_dir.mkdir(parents=True, exist_ok=True) - out_path = out_dir / f"{uuid.uuid4()}.json" + out_path = out_dir / f"{filename}.json" out_path.write_text(json.dumps(log, indent=2) + "\n", encoding="utf-8") return out_path From b92fab59c4eeaae5d8d19f4981628e576b16685b Mon Sep 17 00:00:00 2001 From: Deep Joshi Date: Wed, 10 Jun 2026 19:41:46 +0530 Subject: [PATCH 2/4] Fix UUID handling and improve log writing format --- utils/arc_agi/adapter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/utils/arc_agi/adapter.py b/utils/arc_agi/adapter.py index a8af95c7e..4e8191a62 100644 --- a/utils/arc_agi/adapter.py +++ b/utils/arc_agi/adapter.py @@ -306,11 +306,14 @@ def make_log( def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: - filename = uuid.uuid4() - out_dir = out_root / filename[0:2] / filename[2:4] + filename = str(uuid.uuid4()) + out_dir = out_root / filename[:2] / filename[2:4] out_dir.mkdir(parents=True, exist_ok=True) out_path = out_dir / f"{filename}.json" - out_path.write_text(json.dumps(log, indent=2) + "\n", encoding="utf-8") + out_path.write_text( + json.dumps(log, indent=2) + "\n", + encoding="utf-8" + ) return out_path From 83007f4a562b90c8019da272b43e2291d5728f7d Mon Sep 17 00:00:00 2001 From: Deep Joshi Date: Thu, 11 Jun 2026 18:33:25 +0530 Subject: [PATCH 3/4] added aggregate and instance level jsonl --- utils/arc_agi/adapter.py | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/utils/arc_agi/adapter.py b/utils/arc_agi/adapter.py index 4e8191a62..ef35667b3 100644 --- a/utils/arc_agi/adapter.py +++ b/utils/arc_agi/adapter.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse +import hashlib import json import re import time @@ -11,7 +12,8 @@ from every_eval_ever.helpers import SCHEMA_VERSION -SOURCE_URL = "https://github.com/fchollet/ARC-AGI/tree/master/data" +SOURCE_URL = "https://arcprize.org/media/data/leaderboard/evaluations.json" + def make_source_data() -> dict: return { @@ -306,14 +308,61 @@ def make_log( def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: - filename = str(uuid.uuid4()) - out_dir = out_root / filename[:2] / filename[2:4] + out_dir = out_root / "arc-agi" / developer / model out_dir.mkdir(parents=True, exist_ok=True) - out_path = out_dir / f"{filename}.json" - out_path.write_text( - json.dumps(log, indent=2) + "\n", - encoding="utf-8" - ) + uuid_str = str(uuid.uuid4()) + out_path = out_dir / f"{uuid_str}.json" + + content_str = json.dumps(log, indent=2) + "\n" + content_bytes = content_str.encode("utf-8") + out_path.write_bytes(content_bytes) + + size_bytes = len(content_bytes) + sha256 = hashlib.sha256(content_bytes).hexdigest() + + try: + legacy_path = out_path.relative_to(out_root.parent).as_posix() + except ValueError: + legacy_path = f"data/arc-agi/{developer}/{model}/{uuid_str}.json" + + object_path = f"flat/objects/{uuid_str[:2]}/{uuid_str[2:4]}/{uuid_str}.json" + + aggregate_record = { + "benchmark": "arc-agi", + "eval_schema_version": SCHEMA_VERSION, + "legacy_path": legacy_path, + "object_path": object_path, + "object_uuid": uuid_str, + "record_type": "aggregate", + "sha256": sha256, + "size_bytes": size_bytes + } + + instance_record = { + "benchmark": "arc-agi", + "eval_schema_version": SCHEMA_VERSION, + "instance_object_path": None, + "instance_sha256": None, + "instance_size_bytes": None, + "legacy_path": legacy_path, + "object_path": object_path, + "object_uuid": uuid_str, + "record_type": "aggregate", + "sha256": sha256, + "size_bytes": size_bytes + } + + index_dir = out_dir / "indexes" / "by_collection" / "arc-agi" + index_dir.mkdir(parents=True, exist_ok=True) + + aggregate_index_path = index_dir / "aggregate.jsonl" + with open(aggregate_index_path, "a", encoding="utf-8") as f: + f.write(json.dumps(aggregate_record) + "\n") + + instance_index_path = index_dir / "instance_level.jsonl" + with open(instance_index_path, "a", encoding="utf-8") as f: + f.write(json.dumps(instance_record) + "\n") + return out_path From fa4896824d819eba9735b570e8f7f2ef3bc4fc0d Mon Sep 17 00:00:00 2001 From: Deep Joshi Date: Thu, 11 Jun 2026 18:40:17 +0530 Subject: [PATCH 4/4] Refactor log writing to use UUID filename segments --- utils/arc_agi/adapter.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/utils/arc_agi/adapter.py b/utils/arc_agi/adapter.py index ef35667b3..b9c4ff43e 100644 --- a/utils/arc_agi/adapter.py +++ b/utils/arc_agi/adapter.py @@ -308,10 +308,10 @@ def make_log( def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: - out_dir = out_root / "arc-agi" / developer / model + filename = str(uuid.uuid4()) + out_dir = out_root / filename[:2] / filename[2:4] out_dir.mkdir(parents=True, exist_ok=True) - uuid_str = str(uuid.uuid4()) - out_path = out_dir / f"{uuid_str}.json" + out_path = out_dir / f"{filename}.json" content_str = json.dumps(log, indent=2) + "\n" content_bytes = content_str.encode("utf-8") @@ -323,16 +323,16 @@ def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: try: legacy_path = out_path.relative_to(out_root.parent).as_posix() except ValueError: - legacy_path = f"data/arc-agi/{developer}/{model}/{uuid_str}.json" + legacy_path = f"data/arc-agi/{developer}/{model}/{filename}.json" - object_path = f"flat/objects/{uuid_str[:2]}/{uuid_str[2:4]}/{uuid_str}.json" + object_path = f"flat/objects/{filename[:2]}/{filename[2:4]}/{filename}.json" aggregate_record = { "benchmark": "arc-agi", "eval_schema_version": SCHEMA_VERSION, "legacy_path": legacy_path, "object_path": object_path, - "object_uuid": uuid_str, + "object_uuid": filename, "record_type": "aggregate", "sha256": sha256, "size_bytes": size_bytes @@ -346,13 +346,13 @@ def write_log(log: dict, out_root: Path, developer: str, model: str) -> Path: "instance_size_bytes": None, "legacy_path": legacy_path, "object_path": object_path, - "object_uuid": uuid_str, + "object_uuid": filename, "record_type": "aggregate", "sha256": sha256, "size_bytes": size_bytes } - index_dir = out_dir / "indexes" / "by_collection" / "arc-agi" + index_dir = out_root / "indexes" / "by_collection" / "arc-agi" index_dir.mkdir(parents=True, exist_ok=True) aggregate_index_path = index_dir / "aggregate.jsonl"