Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 70 additions & 8 deletions src/exercisebuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import random
import re
import math
import json
import ast
import ltltoeng
from syntacticmutator import applyRandomMutationNotEquivalentTo

Expand Down Expand Up @@ -75,8 +77,17 @@ def aggregateLogs(self, bucketsizeinhours=1):
bucket += datetime.timedelta(hours=(timestamp.hour % bucketsizeinhours))

# Add the log entry to the corresponding bucket
misconception = log.misconception
buckets[bucket][misconception] += 1
misconception = getattr(log, 'misconception', '')
if misconception:
buckets[bucket][misconception] += 1

# Successful answers are negative evidence for candidate misconceptions
# that were available among distractors but not selected.
# This lets the model reduce misconception weights when students
# repeatedly avoid those misconceptions.
if self._is_correct_answer(log):
for candidate in self._extract_candidate_misconceptions(log):
buckets[bucket][candidate] -= 1.0

# Organize misconceptions by bucket and sort by date
for bucket, misconceptions in buckets.items():
Expand All @@ -98,6 +109,41 @@ def aggregateLogs(self, bucketsizeinhours=1):

return to_return

def _is_correct_answer(self, log):
correct_value = getattr(log, 'correct_answer', False)
return str(correct_value).lower() == 'true'

def _extract_candidate_misconceptions(self, log):
raw_options = getattr(log, 'question_options', None)
if not raw_options:
return []

parsed_options = None
if isinstance(raw_options, list):
parsed_options = raw_options
elif isinstance(raw_options, str):
try:
parsed_options = json.loads(raw_options)
except (TypeError, json.JSONDecodeError):
try:
parsed_options = ast.literal_eval(raw_options)
except (ValueError, SyntaxError):
return []

if not isinstance(parsed_options, list):
return []

misconceptions = set()
for option in parsed_options:
if not isinstance(option, dict):
continue

for misconception in option.get('misconceptions', []):
if isinstance(misconception, str) and misconception:
misconceptions.add(misconception)

return list(misconceptions)



def calculate_misconception_weights(self, concept_history):
Expand Down Expand Up @@ -142,6 +188,10 @@ def calculate_misconception_weights(self, concept_history):
bkt_weight_factor = 0.4
# Frequency weight factor - how much the frequency-based estimate contributes
frequency_weight_factor = 0.6
# Reactivate stale misconceptions by gently increasing priority when
# they have not appeared for a while (prevents starvation).
stale_reactivation_start_hours = 72
stale_reactivation_max = 0.2

# Pre-calculate decay constant: ln(2) / half-life
decay_constant = -math.log(2) / recency_half_life_hours
Expand All @@ -160,28 +210,35 @@ def calculate_misconception_weights(self, concept_history):
recency_weighted_sum = 0
recent_count = 0
total_count = 0
last_seen_hours_ago = None

for date, frequency in entries:
hours_ago = (now - date).total_seconds() / 3600
last_seen_hours_ago = hours_ago

# Exponential decay factor (Ebbinghaus forgetting curve inspired)
decay_factor = math.exp(decay_constant * hours_ago)
recency_weighted_sum += frequency * decay_factor
total_count += frequency
positive_frequency = max(0, frequency)
recency_weighted_sum += positive_frequency * decay_factor
total_count += positive_frequency

# Adapted BKT update: evidence of misconception increases belief
# Standard BKT: P(L_n) = P(L_{n-1}) + (1 - P(L_{n-1})) * P(T)
# We scale by evidence strength (frequency * recency)
evidence_strength = min(1.0, frequency * decay_factor)
evidence_strength = min(1.0, positive_frequency * decay_factor)
p_misconception = p_misconception + (1 - p_misconception) * transition_rate * evidence_strength
p_misconception = min(0.95, p_misconception) # Cap to avoid certainty

# Track recent occurrences for drilling
if hours_ago <= recent_window_hours:
recent_count += frequency
recent_count += positive_frequency

# Calculate trend using comparative analysis
trend_score, _ = self._calculate_trend(entries, now)
has_positive_evidence = any(frequency > 0 for _, frequency in entries)
if not has_positive_evidence:
trend_score = -0.25
else:
trend_score, _ = self._calculate_trend(entries, now)

# Combine BKT probability with frequency-based weight
base_weight = math.log1p(recency_weighted_sum) / log_scale_divisor
Expand All @@ -195,12 +252,17 @@ def calculate_misconception_weights(self, concept_history):
if recent_count >= drilling_threshold:
# Boost proportional to recent frequency, capped at 0.3
drilling_boost = min(0.3, recent_count * 0.05)

stale_reactivation_boost = 0
if last_seen_hours_ago is not None and last_seen_hours_ago > stale_reactivation_start_hours:
staleness = (last_seen_hours_ago - stale_reactivation_start_hours) / stale_reactivation_start_hours
stale_reactivation_boost = min(stale_reactivation_max, staleness * 0.05)

# Final weight combines BKT probability with frequency-based estimate
# bkt_weight_factor controls probabilistic contribution
# frequency_weight_factor controls frequency-based contribution
weight = (p_misconception * bkt_weight_factor) + (default_weight + base_weight) * frequency_weight_factor
weight += trend_adjustment + drilling_boost
weight += trend_adjustment + drilling_boost + stale_reactivation_boost

# Sigmoid squashing to bound output between 0 and 1
weights[concept] = 1 / (1 + math.exp(-(weight - 0.5)))
Expand Down
56 changes: 55 additions & 1 deletion test/test_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

class MockStudentLog:
"""Mock class to simulate student response logs."""
def __init__(self, misconception, timestamp):
def __init__(self, misconception, timestamp, correct_answer=False, question_options=None):
self.misconception = misconception
self.timestamp = timestamp
self.correct_answer = correct_answer
self.question_options = question_options


class TestRegressionModel(unittest.TestCase):
Expand Down Expand Up @@ -250,6 +252,58 @@ def test_exponential_decay_over_time(self):
weights_48h[str(MisconceptionCode.ImplicitG)]
)

def test_correct_answers_reduce_candidate_misconception_weight(self):
"""Correct answers should lower weights for misconceptions in candidate distractors."""
now = datetime.datetime.now()

options_payload = [
{"option": "A", "isCorrect": True, "misconceptions": []},
{"option": "B", "isCorrect": False, "misconceptions": [str(MisconceptionCode.WeakU)]}
]

logs = [
MockStudentLog(
"",
now - datetime.timedelta(hours=2),
correct_answer=True,
question_options=str(options_payload)
),
MockStudentLog(
"",
now - datetime.timedelta(hours=1),
correct_answer=True,
question_options=str(options_payload)
),
]

builder = ExerciseBuilder(logs)
concept_history = builder.aggregateLogs()
weights = builder.calculate_misconception_weights(concept_history)

self.assertLess(weights[str(MisconceptionCode.WeakU)], 0.5)

def test_stale_misconception_gets_reactivation_boost(self):
"""Stale misconceptions should receive a small reactivation boost."""
now = datetime.datetime.now()

stale_logs = [MockStudentLog(
str(MisconceptionCode.ImplicitG),
now - datetime.timedelta(hours=180)
)]
recent_logs = [MockStudentLog(
str(MisconceptionCode.ImplicitG),
now - datetime.timedelta(hours=24)
)]

stale_builder = ExerciseBuilder(stale_logs)
recent_builder = ExerciseBuilder(recent_logs)

stale_weight = stale_builder.calculate_misconception_weights(stale_builder.aggregateLogs())[str(MisconceptionCode.ImplicitG)]
recent_weight = recent_builder.calculate_misconception_weights(recent_builder.aggregateLogs())[str(MisconceptionCode.ImplicitG)]

self.assertGreater(stale_weight, 0.5)
self.assertGreater(recent_weight, stale_weight)


class TestAggregateLogsIntegration(unittest.TestCase):
"""Integration tests for log aggregation with the new model."""
Expand Down