From c162b9e4ba2e9a572b024f6a9a4edb1e93cfdb24 Mon Sep 17 00:00:00 2001 From: Siddhartha Prasad Date: Fri, 27 Feb 2026 14:43:45 -0500 Subject: [PATCH] Refine misconception weighting with negative and stale signals --- src/exercisebuilder.py | 78 +++++++++++++++++++++++++++++++---- test/test_regression_model.py | 56 ++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 9 deletions(-) diff --git a/src/exercisebuilder.py b/src/exercisebuilder.py index 3e769d9..1310d3b 100644 --- a/src/exercisebuilder.py +++ b/src/exercisebuilder.py @@ -7,6 +7,8 @@ import random import re import math +import json +import ast import ltltoeng from syntacticmutator import applyRandomMutationNotEquivalentTo @@ -75,8 +77,17 @@ def aggregateLogs(self, bucketsizeinhours=1): bucket += datetime.timedelta(hours=(timestamp.hour % bucketsizeinhours)) # Add the log entry to the corresponding bucket - misconception = log.misconception - buckets[bucket][misconception] += 1 + misconception = getattr(log, 'misconception', '') + if misconception: + buckets[bucket][misconception] += 1 + + # Successful answers are negative evidence for candidate misconceptions + # that were available among distractors but not selected. + # This lets the model reduce misconception weights when students + # repeatedly avoid those misconceptions. + if self._is_correct_answer(log): + for candidate in self._extract_candidate_misconceptions(log): + buckets[bucket][candidate] -= 1.0 # Organize misconceptions by bucket and sort by date for bucket, misconceptions in buckets.items(): @@ -98,6 +109,41 @@ def aggregateLogs(self, bucketsizeinhours=1): return to_return + def _is_correct_answer(self, log): + correct_value = getattr(log, 'correct_answer', False) + return str(correct_value).lower() == 'true' + + def _extract_candidate_misconceptions(self, log): + raw_options = getattr(log, 'question_options', None) + if not raw_options: + return [] + + parsed_options = None + if isinstance(raw_options, list): + parsed_options = raw_options + elif isinstance(raw_options, str): + try: + parsed_options = json.loads(raw_options) + except (TypeError, json.JSONDecodeError): + try: + parsed_options = ast.literal_eval(raw_options) + except (ValueError, SyntaxError): + return [] + + if not isinstance(parsed_options, list): + return [] + + misconceptions = set() + for option in parsed_options: + if not isinstance(option, dict): + continue + + for misconception in option.get('misconceptions', []): + if isinstance(misconception, str) and misconception: + misconceptions.add(misconception) + + return list(misconceptions) + def calculate_misconception_weights(self, concept_history): @@ -142,6 +188,10 @@ def calculate_misconception_weights(self, concept_history): bkt_weight_factor = 0.4 # Frequency weight factor - how much the frequency-based estimate contributes frequency_weight_factor = 0.6 + # Reactivate stale misconceptions by gently increasing priority when + # they have not appeared for a while (prevents starvation). + stale_reactivation_start_hours = 72 + stale_reactivation_max = 0.2 # Pre-calculate decay constant: ln(2) / half-life decay_constant = -math.log(2) / recency_half_life_hours @@ -160,28 +210,35 @@ def calculate_misconception_weights(self, concept_history): recency_weighted_sum = 0 recent_count = 0 total_count = 0 + last_seen_hours_ago = None for date, frequency in entries: hours_ago = (now - date).total_seconds() / 3600 + last_seen_hours_ago = hours_ago # Exponential decay factor (Ebbinghaus forgetting curve inspired) decay_factor = math.exp(decay_constant * hours_ago) - recency_weighted_sum += frequency * decay_factor - total_count += frequency + positive_frequency = max(0, frequency) + recency_weighted_sum += positive_frequency * decay_factor + total_count += positive_frequency # Adapted BKT update: evidence of misconception increases belief # Standard BKT: P(L_n) = P(L_{n-1}) + (1 - P(L_{n-1})) * P(T) # We scale by evidence strength (frequency * recency) - evidence_strength = min(1.0, frequency * decay_factor) + evidence_strength = min(1.0, positive_frequency * decay_factor) p_misconception = p_misconception + (1 - p_misconception) * transition_rate * evidence_strength p_misconception = min(0.95, p_misconception) # Cap to avoid certainty # Track recent occurrences for drilling if hours_ago <= recent_window_hours: - recent_count += frequency + recent_count += positive_frequency # Calculate trend using comparative analysis - trend_score, _ = self._calculate_trend(entries, now) + has_positive_evidence = any(frequency > 0 for _, frequency in entries) + if not has_positive_evidence: + trend_score = -0.25 + else: + trend_score, _ = self._calculate_trend(entries, now) # Combine BKT probability with frequency-based weight base_weight = math.log1p(recency_weighted_sum) / log_scale_divisor @@ -195,12 +252,17 @@ def calculate_misconception_weights(self, concept_history): if recent_count >= drilling_threshold: # Boost proportional to recent frequency, capped at 0.3 drilling_boost = min(0.3, recent_count * 0.05) + + stale_reactivation_boost = 0 + if last_seen_hours_ago is not None and last_seen_hours_ago > stale_reactivation_start_hours: + staleness = (last_seen_hours_ago - stale_reactivation_start_hours) / stale_reactivation_start_hours + stale_reactivation_boost = min(stale_reactivation_max, staleness * 0.05) # Final weight combines BKT probability with frequency-based estimate # bkt_weight_factor controls probabilistic contribution # frequency_weight_factor controls frequency-based contribution weight = (p_misconception * bkt_weight_factor) + (default_weight + base_weight) * frequency_weight_factor - weight += trend_adjustment + drilling_boost + weight += trend_adjustment + drilling_boost + stale_reactivation_boost # Sigmoid squashing to bound output between 0 and 1 weights[concept] = 1 / (1 + math.exp(-(weight - 0.5))) diff --git a/test/test_regression_model.py b/test/test_regression_model.py index 9a5b8d2..87d0569 100644 --- a/test/test_regression_model.py +++ b/test/test_regression_model.py @@ -16,9 +16,11 @@ class MockStudentLog: """Mock class to simulate student response logs.""" - def __init__(self, misconception, timestamp): + def __init__(self, misconception, timestamp, correct_answer=False, question_options=None): self.misconception = misconception self.timestamp = timestamp + self.correct_answer = correct_answer + self.question_options = question_options class TestRegressionModel(unittest.TestCase): @@ -250,6 +252,58 @@ def test_exponential_decay_over_time(self): weights_48h[str(MisconceptionCode.ImplicitG)] ) + def test_correct_answers_reduce_candidate_misconception_weight(self): + """Correct answers should lower weights for misconceptions in candidate distractors.""" + now = datetime.datetime.now() + + options_payload = [ + {"option": "A", "isCorrect": True, "misconceptions": []}, + {"option": "B", "isCorrect": False, "misconceptions": [str(MisconceptionCode.WeakU)]} + ] + + logs = [ + MockStudentLog( + "", + now - datetime.timedelta(hours=2), + correct_answer=True, + question_options=str(options_payload) + ), + MockStudentLog( + "", + now - datetime.timedelta(hours=1), + correct_answer=True, + question_options=str(options_payload) + ), + ] + + builder = ExerciseBuilder(logs) + concept_history = builder.aggregateLogs() + weights = builder.calculate_misconception_weights(concept_history) + + self.assertLess(weights[str(MisconceptionCode.WeakU)], 0.5) + + def test_stale_misconception_gets_reactivation_boost(self): + """Stale misconceptions should receive a small reactivation boost.""" + now = datetime.datetime.now() + + stale_logs = [MockStudentLog( + str(MisconceptionCode.ImplicitG), + now - datetime.timedelta(hours=180) + )] + recent_logs = [MockStudentLog( + str(MisconceptionCode.ImplicitG), + now - datetime.timedelta(hours=24) + )] + + stale_builder = ExerciseBuilder(stale_logs) + recent_builder = ExerciseBuilder(recent_logs) + + stale_weight = stale_builder.calculate_misconception_weights(stale_builder.aggregateLogs())[str(MisconceptionCode.ImplicitG)] + recent_weight = recent_builder.calculate_misconception_weights(recent_builder.aggregateLogs())[str(MisconceptionCode.ImplicitG)] + + self.assertGreater(stale_weight, 0.5) + self.assertGreater(recent_weight, stale_weight) + class TestAggregateLogsIntegration(unittest.TestCase): """Integration tests for log aggregation with the new model."""