diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java index 882608fbb..f4d36ca62 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/AutoBenchYAML.java @@ -194,7 +194,8 @@ public static void main(String[] args) throws IOException { // Write CSV data try (FileWriter writer = new FileWriter(outputFile)) { // Write CSV header - writer.write("dataset,QPS,QPS StdDev,Mean Latency,Recall@10,Index Construction Time,Avg Nodes Visited\n"); + writer.write("dataset,QPS,QPS StdDev,Mean Latency,Recall@10,Index Construction Time,Avg Nodes Visited," + + "max build heap,max query heap,file size\n"); // Write one row per dataset with average metrics for (Map.Entry entry : statsByDataset.entrySet()) { @@ -207,7 +208,10 @@ public static void main(String[] args) throws IOException { writer.write(datasetStats.getAvgLatency() + ","); writer.write(datasetStats.getAvgRecall() + ","); writer.write(datasetStats.getIndexConstruction() + ","); - writer.write(datasetStats.getAvgNodesVisited() + "\n"); + writer.write(datasetStats.getAvgNodesVisited() + ","); + writer.write(datasetStats.getBuildMaxHeap() + ","); + writer.write(datasetStats.getQueryMaxHeap() + ","); + writer.write(datasetStats.getTotalDisk() + "\n"); } } diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java index 2d41835e1..fb563953f 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/Grid.java @@ -898,11 +898,17 @@ public static List runAllAndCollectResults( } if (!missing.isEmpty()) { + // Start peak memory tracking before building + diagnostics.startBuildMemoryTracking(); + // At least one index needs to be built (b/c not in cache or cache is disabled) // We pass the handles map so buildOnDisk knows exactly where to write var newIndexes = buildOnDisk(missing, m, ef, neighborOverflow, addHierarchy, refineFinalGraph, ds, outputDir, compressor, handles, null); indexes.putAll(newIndexes); + + // Stop peak memory tracking after building + diagnostics.stopBuildMemoryTracking(); } ImmutableGraphIndex index = indexes.get(features); @@ -912,6 +918,7 @@ public static List runAllAndCollectResults( diagnostics.printDiskStatistics("Graph Index Build"); var buildSnapshot = diagnostics.getLatestSystemSnapshot(); DiskUsageMonitor.MultiDirectorySnapshot buildDiskSnapshot = diagnostics.getLatestDiskSnapshot(); + var buildPeakMemory = diagnostics.getBuildPeakMemory(); try (ConfiguredSystem cs = new ConfiguredSystem(ds, index, cvArg, features)) { int queryRuns = 2; @@ -954,13 +961,12 @@ public static List runAllAndCollectResults( allMetrics.put("Index Build Time", indexBuildTimes.get(ds.getName())); } - // Add memory metrics if available - if (buildSnapshot != null) { - allMetrics.put("Heap Memory Used (MB)", buildSnapshot.memoryStats.heapUsed / 1024.0 / 1024.0); - allMetrics.put("Heap Memory Max (MB)", buildSnapshot.memoryStats.heapMax / 1024.0 / 1024.0); - allMetrics.put("Off-Heap Direct (MB)", buildSnapshot.memoryStats.directBufferMemory / 1024.0 / 1024.0); - allMetrics.put("Off-Heap Mapped (MB)", buildSnapshot.memoryStats.mappedBufferMemory / 1024.0 / 1024.0); - allMetrics.put("Total Off-Heap (MB)", buildSnapshot.memoryStats.getTotalOffHeapMemory() / 1024.0 / 1024.0); + // Add peak memory metrics from build phase if available + if (buildPeakMemory != null) { + allMetrics.put("Heap Memory Max (MB)", buildPeakMemory.peakHeapUsed / 1024.0 / 1024.0); + allMetrics.put("Off-Heap Direct (MB)", buildPeakMemory.peakDirectBufferMemory / 1024.0 / 1024.0); + allMetrics.put("Off-Heap Mapped (MB)", buildPeakMemory.peakMappedBufferMemory / 1024.0 / 1024.0); + allMetrics.put("Total Off-Heap (MB)", buildPeakMemory.getTotalPeakOffHeapMemory() / 1024.0 / 1024.0); } // Add disk metrics if available diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/QueryTester.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/QueryTester.java index 0c0686578..409f35c64 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/QueryTester.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/QueryTester.java @@ -88,27 +88,33 @@ public List run( } diagnostics.capturePrePhaseSnapshot("Query"); + + // Start peak memory tracking before running queries + diagnostics.startQueryMemoryTracking(); for (var benchmark : benchmarks) { var metrics = benchmark.runBenchmark(cs, topK, rerankK, usePruning, queryRuns); results.addAll(metrics); } + + // Stop peak memory tracking after running queries + diagnostics.stopQueryMemoryTracking(); // Capture memory and disk usage after running queries diagnostics.capturePostPhaseSnapshot("Query"); - // Add memory and disk metrics to results - var systemSnapshot = diagnostics.getLatestSystemSnapshot(); + // Add peak memory metrics to results + var queryPeakMemory = diagnostics.getQueryPeakMemory(); var diskSnapshot = diagnostics.getLatestDiskSnapshot(); - if (systemSnapshot != null) { - // Max heap usage in MB + if (queryPeakMemory != null) { + // Peak heap usage in MB during queries results.add(Metric.of("search.system.max_heap_mb", "Max heap usage (MB)", ".1f", - systemSnapshot.memoryStats.heapUsed / (1024.0 * 1024.0))); + queryPeakMemory.peakHeapUsed / (1024.0 * 1024.0))); - // Max off-heap usage (direct + mapped) in MB + // Peak off-heap usage (direct + mapped) in MB during queries results.add(Metric.of("search.system.max_offheap_mb", "Max offheap usage (MB)", ".1f", - systemSnapshot.memoryStats.getTotalOffHeapMemory() / (1024.0 * 1024.0))); + queryPeakMemory.getTotalPeakOffHeapMemory() / (1024.0 * 1024.0))); } if (diskSnapshot != null) { diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java index 587bd2b02..a38ee9aa7 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/BenchmarkDiagnostics.java @@ -39,6 +39,12 @@ public class BenchmarkDiagnostics implements AutoCloseable { private final List diskSnapshots; private final List timingAnalyses; private boolean diskMonitorStarted = false; + + // Peak memory tracking for build and query phases + private SystemMonitor.PeakMemoryStats buildPeakMemory = null; + private SystemMonitor.PeakMemoryStats queryPeakMemory = null; + private boolean trackingBuildMemory = false; + private boolean trackingQueryMemory = false; public BenchmarkDiagnostics(DiagnosticLevel level) { this.level = level; @@ -100,6 +106,74 @@ public void startMonitoring(String label, Path directory) throws IOException { diskUsageMonitor.addDirectory(label, directory); } } + + /** + * Starts tracking peak memory usage for the build phase. + * Call this before starting index construction. + */ + public void startBuildMemoryTracking() { + if (!trackingBuildMemory) { + systemMonitor.startPeakMemoryTracking(); + trackingBuildMemory = true; + } + } + + /** + * Stops tracking peak memory usage for the build phase and captures the peak values. + * Call this after index construction completes. + */ + public void stopBuildMemoryTracking() { + if (trackingBuildMemory) { + buildPeakMemory = systemMonitor.stopPeakMemoryTracking(); + trackingBuildMemory = false; + + if (level != DiagnosticLevel.NONE) { + System.out.printf("[Build] Peak Memory: %s%n", buildPeakMemory); + } + } + } + + /** + * Starts tracking peak memory usage for the query phase. + * Call this before starting query execution. + */ + public void startQueryMemoryTracking() { + if (!trackingQueryMemory) { + systemMonitor.startPeakMemoryTracking(); + trackingQueryMemory = true; + } + } + + /** + * Stops tracking peak memory usage for the query phase and captures the peak values. + * Call this after query execution completes. + */ + public void stopQueryMemoryTracking() { + if (trackingQueryMemory) { + queryPeakMemory = systemMonitor.stopPeakMemoryTracking(); + trackingQueryMemory = false; + + if (level != DiagnosticLevel.NONE) { + System.out.printf("[Query] Peak Memory: %s%n", queryPeakMemory); + } + } + } + + /** + * Gets the peak memory statistics captured during the build phase. + * Returns null if build memory tracking was not performed. + */ + public SystemMonitor.PeakMemoryStats getBuildPeakMemory() { + return buildPeakMemory; + } + + /** + * Gets the peak memory statistics captured during the query phase. + * Returns null if query memory tracking was not performed. + */ + public SystemMonitor.PeakMemoryStats getQueryPeakMemory() { + return queryPeakMemory; + } /** * Captures system state before starting a benchmark phase diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/SystemMonitor.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/SystemMonitor.java index 2a323fea1..54533009b 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/SystemMonitor.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/benchmarks/diagnostics/SystemMonitor.java @@ -18,10 +18,13 @@ import java.lang.management.*; import java.util.List; +import java.util.concurrent.atomic.AtomicLong; /** * Utility class for monitoring system resources during benchmark execution. * Tracks GC activity, memory usage (on-heap and off-heap), CPU load, and thread statistics. + * + * Supports continuous peak memory tracking via a background sampling thread. */ public class SystemMonitor { @@ -32,6 +35,13 @@ public class SystemMonitor { private final com.sun.management.OperatingSystemMXBean sunOsBean; private final List memoryPoolBeans; private final List bufferPoolBeans; + + // Peak memory tracking + private final AtomicLong peakHeapUsed = new AtomicLong(0); + private final AtomicLong peakDirectBufferMemory = new AtomicLong(0); + private final AtomicLong peakMappedBufferMemory = new AtomicLong(0); + private volatile Thread samplingThread; + private volatile boolean sampling = false; public SystemMonitor() { this.memoryBean = ManagementFactory.getMemoryMXBean(); @@ -42,6 +52,93 @@ public SystemMonitor() { this.memoryPoolBeans = ManagementFactory.getMemoryPoolMXBeans(); this.bufferPoolBeans = ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class); } + + /** + * Starts continuous memory sampling in a background thread. + * Samples memory usage every 50ms and tracks peak values. + */ + public void startPeakMemoryTracking() { + if (sampling) { + return; // Already sampling + } + + // Reset peak values + peakHeapUsed.set(0); + peakDirectBufferMemory.set(0); + peakMappedBufferMemory.set(0); + + sampling = true; + samplingThread = new Thread(() -> { + while (sampling) { + try { + MemoryStats current = captureMemoryStats(); + + // Update peak values atomically + updatePeak(peakHeapUsed, current.heapUsed); + updatePeak(peakDirectBufferMemory, current.directBufferMemory); + updatePeak(peakMappedBufferMemory, current.mappedBufferMemory); + + Thread.sleep(50); // Sample every 50ms + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + }, "SystemMonitor-PeakMemorySampler"); + samplingThread.setDaemon(true); + samplingThread.start(); + } + + /** + * Stops continuous memory sampling and returns the peak values observed. + */ + public PeakMemoryStats stopPeakMemoryTracking() { + sampling = false; + if (samplingThread != null) { + try { + samplingThread.join(1000); // Wait up to 1 second + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + samplingThread = null; + } + + return new PeakMemoryStats( + peakHeapUsed.get(), + peakDirectBufferMemory.get(), + peakMappedBufferMemory.get() + ); + } + + /** + * Gets current peak memory values without stopping tracking. + */ + public PeakMemoryStats getCurrentPeakMemory() { + return new PeakMemoryStats( + peakHeapUsed.get(), + peakDirectBufferMemory.get(), + peakMappedBufferMemory.get() + ); + } + + /** + * Resets peak memory tracking values. + */ + public void resetPeakMemory() { + peakHeapUsed.set(0); + peakDirectBufferMemory.set(0); + peakMappedBufferMemory.set(0); + } + + private void updatePeak(AtomicLong peak, long current) { + long currentPeak; + do { + currentPeak = peak.get(); + if (current <= currentPeak) { + break; + } + } while (!peak.compareAndSet(currentPeak, current)); + } /** * Captures current system state snapshot @@ -167,6 +264,33 @@ public void logDetailedGCStats(String phase) { gcBean.getName(), gcBean.getCollectionCount(), gcBean.getCollectionTime()); } } + /** + * Container for peak memory statistics captured during continuous monitoring. + */ + public static class PeakMemoryStats { + public final long peakHeapUsed; + public final long peakDirectBufferMemory; + public final long peakMappedBufferMemory; + + public PeakMemoryStats(long peakHeapUsed, long peakDirectBufferMemory, long peakMappedBufferMemory) { + this.peakHeapUsed = peakHeapUsed; + this.peakDirectBufferMemory = peakDirectBufferMemory; + this.peakMappedBufferMemory = peakMappedBufferMemory; + } + + public long getTotalPeakOffHeapMemory() { + return peakDirectBufferMemory + peakMappedBufferMemory; + } + + @Override + public String toString() { + return String.format("PeakMemory[heap=%d MB, direct=%d MB, mapped=%d MB]", + peakHeapUsed / 1024 / 1024, + peakDirectBufferMemory / 1024 / 1024, + peakMappedBufferMemory / 1024 / 1024); + } + } + // Inner classes for data structures public static class SystemSnapshot { diff --git a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java index 60b9a80f1..8bbe332ba 100644 --- a/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java +++ b/jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizer.java @@ -17,8 +17,10 @@ import io.github.jbellis.jvector.example.BenchResult; +import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; /** * Utility class for summarizing benchmark results by calculating average metrics @@ -37,8 +39,25 @@ public static class SummaryStats { private final int totalConfigurations; private final double qpsStdDev; private final double avgNodesVisited; + // Disk and memory stats + private final double buildMaxOffheap; + private final double buildMaxHeap; + private final double queryMaxOffheap; + private final double queryMaxHeap; + private final double totalDisk; - public SummaryStats(double avgRecall, double avgQps, double avgLatency, double indexConstruction, int totalConfigurations, double qpsStdDev, double avgNodesVisited) { + public SummaryStats(double avgRecall, + double avgQps, + double avgLatency, + double indexConstruction, + int totalConfigurations, + double qpsStdDev, + double avgNodesVisited, + double buildMaxOffheap, + double buildMaxHeap, + double queryMaxOffheap, + double queryMaxHeap, + double totalDisk) { this.avgRecall = avgRecall; this.avgQps = avgQps; this.avgLatency = avgLatency; @@ -46,6 +65,11 @@ public SummaryStats(double avgRecall, double avgQps, double avgLatency, double i this.totalConfigurations = totalConfigurations; this.qpsStdDev = qpsStdDev; this.avgNodesVisited = avgNodesVisited; + this.buildMaxOffheap = buildMaxOffheap; + this.buildMaxHeap = buildMaxHeap; + this.queryMaxOffheap = queryMaxOffheap; + this.queryMaxHeap = queryMaxHeap; + this.totalDisk = totalDisk; } public double getAvgRecall() { @@ -70,6 +94,16 @@ public int getTotalConfigurations() { public double getAvgNodesVisited() { return avgNodesVisited; } + public double getBuildMaxOffheap() { return buildMaxOffheap; } + + public double getBuildMaxHeap() { return buildMaxHeap; } + + public double getQueryMaxOffheap() { return queryMaxOffheap; } + + public double getQueryMaxHeap() { return queryMaxHeap; } + + public double getTotalDisk() { return totalDisk; } + @Override public String toString() { return String.format( @@ -90,7 +124,7 @@ public String toString() { */ public static SummaryStats summarize(List results) { if (results == null || results.isEmpty()) { - return new SummaryStats(0, 0, 0, 0, 0, 0, 0); + return new SummaryStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } double totalRecall = 0; @@ -99,6 +133,11 @@ public static SummaryStats summarize(List results) { double indexConstruction = 0; double totalQpsStdDev = 0; double totalNodesVisited = 0; + double buildMaxOffheap = 0; + double buildMaxHeap = 0; + double queryMaxOffheap = 0; + double queryMaxHeap = 0; + double totalDisk = 0; int recallCount = 0; int qpsCount = 0; @@ -137,16 +176,30 @@ public static SummaryStats summarize(List results) { latencyCount++; } - indexConstruction = extractIndexConstructionMetric(result.metrics); - // Extract nodes visited metric (format is "Avg Visited") Double nodesVisited = extractNodesVisitedMetric(result.metrics); if (nodesVisited != null) { totalNodesVisited += nodesVisited; nodesVisitedCount++; } + + } + + /* + These metrics should only occur once per dataset. If there are more than one result sets being passed it's + a summary where averaging the results of these categories would be nonsensical. + */ + if (results.size() == 1) { + var metrics = results.get(0).metrics; + indexConstruction = extractIndexConstructionMetric(metrics); + buildMaxOffheap = extractDouble("Total Off-Heap (MB)", metrics); + buildMaxHeap = extractDouble("Heap Memory Max (MB)", metrics); + queryMaxOffheap = extractDouble("Max offheap usage (MB)", metrics); + queryMaxHeap = extractDouble("Max heap usage (MB)", metrics); + totalDisk = extractDouble("Disk Usage (MB)", metrics); } + // Calculate averages, handling cases where some metrics might not be present double avgRecall = recallCount > 0 ? totalRecall / recallCount : 0; double avgQps = qpsCount > 0 ? totalQps / qpsCount : 0; @@ -157,7 +210,17 @@ public static SummaryStats summarize(List results) { // Count total valid configurations as the maximum count of any metric int totalConfigurations = Math.max(Math.max(recallCount, qpsCount), latencyCount); - return new SummaryStats(avgRecall, avgQps, avgLatency, indexConstruction, totalConfigurations, avgQpsStdDev, avgNodesVisited); + return new SummaryStats(avgRecall, avgQps, avgLatency, indexConstruction, totalConfigurations, avgQpsStdDev, avgNodesVisited, + buildMaxOffheap, buildMaxHeap, queryMaxOffheap, queryMaxHeap, totalDisk); + } + + private static Double extractDouble(String metric, Map metrics) { + for (Map.Entry entry : metrics.entrySet()) { + if (entry.getKey().startsWith(metric)) { + return convertToDouble(entry.getValue()); + } + } + return 0.0; } private static Double extractIndexConstructionMetric(Map metrics) { diff --git a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java index 668f573ac..d0b77113a 100644 --- a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java +++ b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/BenchmarkSummarizerTest.java @@ -115,7 +115,7 @@ public void testSummarizeWithNullList() { @Test public void testSummaryStatsToString() { // Create a SummaryStats instance - SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100) + SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100, 0, 0, 0, 0, 0) ; // Verify toString output String expected = String.format( diff --git a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java index c3d698bab..40eb4de68 100644 --- a/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java +++ b/jvector-examples/src/test/java/io/github/jbellis/jvector/example/util/SummarizerTest.java @@ -121,7 +121,7 @@ private static void testSummaryStatsToString() { System.out.println("\nTest: SummaryStats toString method"); // Create a SummaryStats instance - SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100); + SummaryStats stats = new SummaryStats(0.85, 1200.0, 5.2, 1000000, 4, 0.2, 100, 0, 0, 0, 0, 0); // Verify toString output String expected = String.format( diff --git a/visualize_benchmarks.py b/visualize_benchmarks.py index 903c3448a..81d715d4a 100644 --- a/visualize_benchmarks.py +++ b/visualize_benchmarks.py @@ -24,7 +24,15 @@ from typing import Dict, List, Any, Tuple, Optional import matplotlib.pyplot as plt import numpy as np -from matplotlib.ticker import MaxNLocator +from matplotlib.ticker import MaxNLocator, FuncFormatter + +# Try to import mplcursors for interactive tooltips +try: + import mplcursors + MPLCURSORS_AVAILABLE = True +except ImportError: + MPLCURSORS_AVAILABLE = False + print("Note: Install mplcursors for interactive tooltips: pip install mplcursors") # Define metrics where higher values are better and lower values are better @@ -153,12 +161,18 @@ def generate_plots(benchmark_data: BenchmarkData, output_dir: str): # Create a plot for each metric for metric in benchmark_data.metrics: - plt.figure(figsize=(10, 6)) + fig, ax = plt.subplots(figsize=(12, 7)) + + # Store line objects for interactive tooltips + lines = [] + all_values = [] # Plot one line per dataset for dataset in benchmark_data.datasets: releases, values = benchmark_data.get_metric_data_for_dataset(metric, dataset) if releases and values: + all_values.extend(values) + # For QPS, try to add error bars using corresponding stddev column if metric == "QPS": std_releases, std_values = benchmark_data.get_metric_data_for_dataset("QPS StdDev", dataset) @@ -166,23 +180,62 @@ def generate_plots(benchmark_data: BenchmarkData, output_dir: str): # Align stddev values to the QPS releases order std_map = {r: v for r, v in zip(std_releases, std_values)} yerr = [std_map.get(r, 0.0) for r in releases] - plt.errorbar(releases, values, yerr=yerr, marker='o', capsize=4, label=dataset) + line = ax.errorbar(releases, values, yerr=yerr, marker='o', capsize=4, label=dataset) + lines.append(line) else: - plt.plot(releases, values, marker='o', label=dataset) + line, = ax.plot(releases, values, marker='o', label=dataset) + lines.append(line) else: - plt.plot(releases, values, marker='o', label=dataset) - - plt.title(f"{metric} Over Time") - plt.xlabel("Release") - plt.ylabel(metric) + line, = ax.plot(releases, values, marker='o', label=dataset) + lines.append(line) + + # Annotate each data point with its value (2 decimal places) + for i, (release, value) in enumerate(zip(releases, values)): + # Determine text position (alternate left/right to avoid overlap) + if i % 2 == 0: + ha = 'right' + offset_x = -8 + else: + ha = 'left' + offset_x = 8 + + ax.annotate(f'{value:.2f}', + xy=(release, value), + xytext=(offset_x, 5), + textcoords='offset points', + fontsize=8, + ha=ha, + va='bottom', + bbox=dict(boxstyle='round,pad=0.3', facecolor='white', + edgecolor='gray', alpha=0.7)) + + # Format Y-axis to show 2 decimal places + ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{y:.2f}')) + + # Add interactive tooltips if mplcursors is available + if MPLCURSORS_AVAILABLE and lines: + cursor = mplcursors.cursor(lines, hover=True) + @cursor.connect("add") + def on_add(sel): + # Get the dataset name from the line label + line = sel.artist + label = line.get_label() + x_val = sel.target[0] + y_val = sel.target[1] + sel.annotation.set_text(f'{label}\nRelease: {x_val}\nValue: {y_val:.2f}') + sel.annotation.get_bbox_patch().set(facecolor='yellow', alpha=0.9) + + ax.set_title(f"{metric} Over Time") + ax.set_xlabel("Release") + ax.set_ylabel(metric) plt.xticks(rotation=45) - plt.grid(True, linestyle='--', alpha=0.7) - plt.legend() + ax.grid(True, linestyle='--', alpha=0.7) + ax.legend() plt.tight_layout() # Save the plot safe_metric_name = metric.replace('@', '_at_').replace(' ', '_') - plt.savefig(os.path.join(output_dir, f"{safe_metric_name}.png")) + plt.savefig(os.path.join(output_dir, f"{safe_metric_name}.png"), dpi=150) plt.close() # Create a combined plot with aggregated values for comparison