ServiceNow · ehsk · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · May 4, 2026
diff --git a/conf/finetune/gspo.yaml b/conf/finetune/gspo.yaml
@@ -0,0 +1,9 @@
+defaults:
+  - base
+  - _self_
+
+attempts: 8
+rl:
+  policy_loss: gspo
+  epsilon_high: 4e-4
+  epsilon_low: 3e-4
diff --git a/conf/swe.yaml b/conf/swe.yaml
@@ -0,0 +1,53 @@
+defaults:
+    - base
+    - _self_
+    - override finetune: gspo
+
+model_path: Qwen/Qwen3-8B
+
+actor:
+  rollout_policy: pipelinerl.domains.swe.rollouts.generate_swe_rollout
+  success_threshold: 0.8
+
+environments: null
+
+dataset_loader: pipelinerl.domains.swe.load_datasets.load_local_swe_dataset
+dataset_loader_params:
+  seed: ${seed}
+  # max_samples: 1000  # uncomment to cap the number of loaded samples (applies to both train and test)
+
+# HuggingFace Hub dataset IDs (or local disk paths).
+# Append ":split" to restrict to a specific split, e.g. SWE-bench/SWE-smith-py:train
+# NOTE: SWE-smith and SWE-bench_Verified do not ship with gold_file_contents —
+# run the preprocessor once per dataset before training, e.g.:
+#   python -m pipelinerl.domains.swe.swe_preprocessor --config-name=swe/preprocess \
+#     hf_dataset_name=SWE-bench/SWE-bench_Verified hf_split_name=test \
+#     dataset_path=/your/output/path repo_path=/your/repos/cache
+# then point the entry below at the resulting dataset_path on disk.
+train_dataset_names:
+  - SWE-bench/SWE-smith
+test_dataset_names:
+  - SWE-bench/SWE-bench_Verified
+
+finetune:
+  seq_length: 24000
+  rl:
+    filter_zero_advantage_groups: false
+
+vllm_config:
+  vllm_kwargs:
+    max_model_len: 24000
+
+llm:
+  parameters:
+    max_tokens: 4096
+    temperature: 1.0
+  chat_template_kwargs:
+    enable_thinking: false
+
+test_llm:
+  parameters:
+    max_tokens: 4096
+    temperature: 0.0
+  chat_template_kwargs:
+    enable_thinking: false
diff --git a/conf/swe/preprocess.yaml b/conf/swe/preprocess.yaml
@@ -0,0 +1,15 @@
+# Config for swe_preprocessor.py.
+# Clones repos, extracts gold_file_contents at base_commit, applies token
+# filtering, and saves a training-ready HuggingFace disk dataset.
+#
+# Run: python -m pipelinerl.domains.swe.swe_preprocessor --config-name=swe/preprocess
+
+hf_dataset_name: SWE-bench/SWE-smith-py
+hf_split_name: train
+repo_path: /path/to/repos
+dataset_path: /path/to/output_ds
+tokenizer_model: Qwen/Qwen3-8B
+min_token_threshold: null   # set to an int to filter out very short examples
+max_token_threshold: 16000  # set to null to disable
+num_map_processes: 32
+force_reprocess: false
diff --git a/pipelinerl/domains/swe/load_datasets.py b/pipelinerl/domains/swe/load_datasets.py
@@ -0,0 +1,127 @@
+# Supported datasets
+# ──────────────────────────────────────────────────────────────────────────────
+# Ready to use (have gold_file_contents pre-extracted):
+#   SWE-bench/SWE-smith-py        local preprocessed disk dataset or Hub ID
+#   SWE-bench/SWE-smith-java      "
+#   SWE-bench/SWE-smith-rs        "
+#   SWE-bench/SWE-smith-go        "
+#
+# Require preprocessing first (clone repos, extract file contents at base_commit):
+#   princeton-nlp/SWE-bench
+#   princeton-nlp/SWE-bench_Lite
+#   princeton-nlp/SWE-bench_Verified
+#   SWE-bench/SWE-Pro (if/when released publicly)
+#
+#   Run: python -m pipelinerl.domains.swe.swe_preprocessor --config-name=swe/preprocess
+# ──────────────────────────────────────────────────────────────────────────────
+
+import json
+import logging
+import os
+import random
+from typing import Any, Dict, List, Optional
+
+from datasets import load_dataset, load_from_disk
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_file_contents(raw: Any) -> Dict[str, str]:
+    if isinstance(raw, dict):
+        return {str(k): str(v) for k, v in raw.items()}
+    if isinstance(raw, str):
+        try:
+            parsed = json.loads(raw)
+        except (json.JSONDecodeError, TypeError):
+            return {}
+        if isinstance(parsed, dict):
+            return {str(k): str(v) for k, v in parsed.items()}
+    return {}
+
+
+def _load_single_dataset(path: str) -> List[Dict]:
+    """Load a dataset from a local disk path or a HuggingFace Hub ID.
+
+    Local path:   /path/to/ds_train
+    Hub ID:       SWE-bench/SWE-smith-py          (all splits concatenated)
+    Hub ID+split: SWE-bench/SWE-smith-py:train
+    """
+    if os.path.exists(path):
+        logger.info("Loading from disk: %s", path)
+        dataset = load_from_disk(path)
+    else:
+        # Hub ID, optionally with ":split" suffix
+        if ":" in path:
+            hub_id, split = path.rsplit(":", 1)
+        else:
+            hub_id, split = path, None
+
+        logger.info("Loading from HuggingFace Hub: %s (split=%s)", hub_id, split or "all")
+        loaded = load_dataset(hub_id, split=split)
+
+        if split is None:
+            # DatasetDict — concatenate all splits
+            from datasets import concatenate_datasets
+            dataset = concatenate_datasets(list(loaded.values()))
+        else:
+            dataset = loaded
+
+    logger.info("Loaded %d rows from %s", len(dataset), path)
+
+    samples = []
+    for row in dataset:
+        item = dict(row)
+        try:
+            file_contents = _parse_file_contents(item.get("gold_file_contents", "{}"))
+            if not file_contents:
+                continue
+            samples.append({
+                "id": item.get("id", "") or item.get("instance_id", "") or item.get("issue_id", ""),
+                "dataset": item.get("dataset", "") or path,
+                "repo": item.get("repo", ""),
+                "base_commit": item.get("base_commit", ""),
+                "problem_statement": item.get("problem_statement", ""),
+                "patch": item.get("patch", ""),
+                "file_contents": file_contents,
+            })
+        except Exception as e:
+            logger.warning("Skipping malformed item: %s", e)
+
+    return samples
+
+
+def load_local_swe_dataset(
+    dataset_paths: List[str],
+    seed: int = 42,
+    max_samples: Optional[int] = None,
+) -> List[Dict]:
+    """
+    Load one or more SWE-style datasets from disk and return a combined, shuffled list.
+
+    Args:
+        dataset_paths: Passed via cfg.train_dataset_names / cfg.test_dataset_names.
+                       Each entry is a filesystem path to a HuggingFace disk dataset.
+                       Add multiple paths to mix datasets (e.g. swe-smith + swe-bench).
+        seed: Random seed for shuffling (inherit from cfg.seed via dataset_loader_params).
+        max_samples: Optional cap on the total number of returned samples.
+    """
+    if not dataset_paths:
+        logger.error("No dataset paths provided")
+        return []
+
+    all_samples: List[Dict] = []
+    for path in dataset_paths:
+        try:
+            all_samples.extend(_load_single_dataset(path))
+        except Exception as e:
+            logger.error("Failed to load dataset from %s: %s", path, e, exc_info=True)
+
+    random.Random(seed).shuffle(all_samples)
+    logger.info("Shuffled %d samples (seed=%d)", len(all_samples), seed)
+
+    if max_samples and len(all_samples) > max_samples:
+        all_samples = all_samples[:max_samples]
+        logger.info("Trimmed to max_samples=%d", max_samples)
+
+    logger.info("Returning %d samples total", len(all_samples))
+    return all_samples
diff --git a/pipelinerl/domains/swe/repair.py b/pipelinerl/domains/swe/repair.py
@@ -0,0 +1,116 @@
+import logging
+from typing import Dict, List
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = "You are a helpful coding assistant that analyzes code and fixes bugs."
+
+USER_PROMPT_TEMPLATE = (
+    "Analyze the following code to find and fix bugs. Use this format:\n\n"
+    "<think>\n"
+    "[Your analysis process - be as detailed as you want until you're confident in your solution]\n"
+    "</think>\n\n"
+    "<solution>\n"
+    "[Your SEARCH/REPLACE edits using this format:]\n\n"
+    "```\n"
+    "### filename.py\n"
+    "<<<<<<< SEARCH\n"
+    "[exact code to find]\n"
+    "=======\n"
+    "[replacement code]\n"
+    ">>>>>>> REPLACE\n"
+    "```\n"
+    "</solution>\n\n"
+    "IMPORTANT REQUIREMENTS:\n"
+    "- Every SEARCH/REPLACE edit must use the exact format above\n"
+    "- The SEARCH block must contain a contiguous chunk of lines that exist in the source code\n"
+    "- PROPER INDENTATION IS CRITICAL - if you want to add '    print(x)', you must include all those spaces\n"
+    "- Wrap each SEARCH/REPLACE edit in a code block\n"
+    "- Use separate code blocks for multiple edits\n\n"
+    "Example:\n"
+    "```python\n"
+    "### mathweb/flask/app.py\n"
+    "<<<<<<< SEARCH\n"
+    "from flask import Flask\n"
+    "=======\n"
+    "import math\n"
+    "from flask import Flask\n"
+    ">>>>>>> REPLACE\n"
+    "```\n\n"
+    "Here is the issue:\n"
+    "--- BEGIN ISSUE ---\n"
+    "{problem_statement}\n"
+    "--- END ISSUE ---\n\n"
+    "Below are the code files that may contain bugs:\n"
+    "{file_contents}"
+)
+
+
+def build_messages(problem_statement: str, file_contents: Dict[str, str]) -> List[dict]:
+    """Build the chat messages for a single-turn repair prompt."""
+    formatted_files = "".join(
+        f"### {path}\n```\n{content}\n```\n\n"
+        for path, content in file_contents.items()
+    )
+    user_content = USER_PROMPT_TEMPLATE.format(
+        problem_statement=problem_statement,
+        file_contents=formatted_files,
+    )
+    return [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": user_content},
+    ]
+
+
+def parse_edits(completion: str) -> List[dict]:
+    """
+    Parse SEARCH/REPLACE blocks from a model completion.
+
+    Each block is a '### filepath' line followed by a
+    <<<<<<< SEARCH / ======= / >>>>>>> REPLACE triple. Triple-backtick code
+    fences around the block are accepted but not required.
+    Returns a list of {'file_path', 'search', 'replace'} dicts.
+    """
+    edits: List[dict] = []
+    lines = completion.split('\n')
+    n = len(lines)
+    i = 0
+    while i < n:
+        if '<<<<<<< SEARCH' not in lines[i]:
+            i += 1
+            continue
+
+        # Walk back to the most recent '### filepath' line, but don't cross a
+        # previous '>>>>>>> REPLACE' marker (that path belongs to an earlier edit).
+        file_path = None
+        for j in range(i - 1, -1, -1):
+            if '>>>>>>> REPLACE' in lines[j]:
+                break
+            stripped = lines[j].strip()
+            if stripped.startswith('###'):
+                file_path = stripped[3:].strip()
+                break
+        if not file_path:
+            i += 1
+            continue
+
+        search_start = i + 1
+        sep = replace_end = None
+        for k in range(search_start, n):
+            if sep is None and '=======' in lines[k]:
+                sep = k
+            elif sep is not None and '>>>>>>> REPLACE' in lines[k]:
+                replace_end = k
+                break
+
+        if sep is None or replace_end is None:
+            i += 1
+            continue
+
+        edits.append({
+            'file_path': file_path,
+            'search': '\n'.join(lines[search_start:sep]),
+            'replace': '\n'.join(lines[sep + 1:replace_end]),
+        })
+        i = replace_end + 1
+    return edits