From 5503acb0ad07312706b682407ddbc847bbbc7834 Mon Sep 17 00:00:00 2001 From: David First Date: Tue, 23 Jun 2026 15:11:09 -0400 Subject: [PATCH 1/5] perf(workspace): batch deps-cache invalidation into one workspace fs scan The deps fs-cache freshness check ran a recursive globby per component that followed each component's node_modules symlink into the shared workspace node_modules (226k of 230k scanned entries), 313x per command. Replace it with a single node_modules-ignoring workspace scan, memoized as a command-scoped mtime index on FsCache and invalidated via the workspace's clear-cache hooks (so watch stays correct), with a per-component fallback. Cuts warm `bit status` fs syscalls ~40% (74.3k -> 44.8k); read traffic unchanged. Warm-wall-neutral on fast SSD (I/O-wait overlapping CPU), a real win on cold/CI/networked filesystems. --- .../dependencies-loader.ts | 29 ++++++- scopes/toolbox/fs/last-modified/index.ts | 1 + .../toolbox/fs/last-modified/last-modified.ts | 64 +++++++++++++++ scopes/workspace/modules/fs-cache/fs-cache.ts | 37 +++++++++ .../workspace/component-loading-redesign.md | 78 +++++++++++++------ scopes/workspace/workspace/workspace.ts | 3 + 6 files changed, 184 insertions(+), 28 deletions(-) diff --git a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts index 31a11ee5247d..56b85203304e 100644 --- a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts +++ b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts @@ -4,6 +4,7 @@ import { IssuesClasses } from '@teambit/component-issues'; import { getLastModifiedComponentTimestampMs, getLastModifiedPathsTimestampMs, + buildDirsLastModifiedIndex, } from '@teambit/toolbox.fs.last-modified'; import { ExtensionDataEntry } from '@teambit/legacy.extension-data'; import type { DependencyLoaderOpts, ConsumerComponent as Component } from '@teambit/legacy.consumer-component'; @@ -119,10 +120,7 @@ export class DependenciesLoader { // to invalidate the cache in such a case. return null; } - const filesPaths = this.component.files.map((f) => f.path); - const componentConfigPath = path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME); - filesPaths.push(componentConfigPath); - const lastModifiedComponent = await getLastModifiedComponentTimestampMs(rootDir, filesPaths); + const lastModifiedComponent = await this.getComponentLastModified(workspace, rootDir); const wasModifiedAfterCache = lastModifiedComponent > cacheData.timestamp; if (wasModifiedAfterCache) { @@ -146,6 +144,29 @@ export class DependenciesLoader { return DependenciesData.deserialize(cacheData.data); } + /** + * last-modified time of this component's files, used to decide whether the cached deps are stale. + * reads from a workspace-wide index built with a single filesystem scan and shared across all + * components in the command (instead of a recursive per-component scan — the hot path on large + * workspaces). falls back to a per-component scan when the entry isn't in the index, e.g. after a + * single-component cache clear (watch) or for a component added since the scan. + */ + private async getComponentLastModified(workspace: Workspace, rootDir: string): Promise { + const index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() => + buildDirsLastModifiedIndex( + workspace.path, + workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir()) + ) + ); + const fromIndex = index.get(rootDir); + if (fromIndex !== undefined) return fromIndex; + const filesPaths = this.component.files.map((file) => file.path); + filesPaths.push(path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME)); + const lastModified = await getLastModifiedComponentTimestampMs(rootDir, filesPaths); + index.set(rootDir, lastModified); + return lastModified; + } + private shouldSaveInCache(dependenciesData: DependenciesData, storeInFsCache = true) { if (!storeInFsCache) return false; if (!dependenciesData.issues) return true; diff --git a/scopes/toolbox/fs/last-modified/index.ts b/scopes/toolbox/fs/last-modified/index.ts index 5d9d3e9f80f6..d559a0875783 100644 --- a/scopes/toolbox/fs/last-modified/index.ts +++ b/scopes/toolbox/fs/last-modified/index.ts @@ -2,4 +2,5 @@ export { getLastModifiedPathsTimestampMs, getPathStatIfExist, getLastModifiedComponentTimestampMs, + buildDirsLastModifiedIndex, } from './last-modified'; diff --git a/scopes/toolbox/fs/last-modified/last-modified.ts b/scopes/toolbox/fs/last-modified/last-modified.ts index 13a72d81fbea..bdb805ca84e2 100644 --- a/scopes/toolbox/fs/last-modified/last-modified.ts +++ b/scopes/toolbox/fs/last-modified/last-modified.ts @@ -1,8 +1,11 @@ +import nodePath from 'path'; import globby from 'globby'; import type { Stats } from 'fs-extra'; import fs from 'fs-extra'; import { compact } from 'lodash'; +type GlobbyStatEntry = { path: string; stats?: { mtimeMs: number } }; + /** * check recursively all the sub-directories as well */ @@ -37,3 +40,64 @@ export async function getPathStatIfExist(path: string): Promise { throw err; } } + +/** + * find the directory in `dirSet` that owns `relPath` — the deepest dir that is a path-prefix of it. + */ +function ownerDir(relPath: string, dirSet: Set): string | undefined { + const parts = relPath.split('/'); + for (let i = parts.length - 1; i > 0; i -= 1) { + const candidate = parts.slice(0, i).join('/'); + if (dirSet.has(candidate)) return candidate; + } + return undefined; +} + +/** + * build a last-modified index for many directories with a *single* filesystem scan, keyed by each + * input dir (relative to `cwd`). the value is the max mtime over every file and nested directory + * under that dir, plus the dir's own mtime. equivalent to calling `getLastModifiedComponentTimestampMs` + * per dir, but replaces N recursive `globby` scans with one — the hot path on large workspaces. + * + * the per-dir value catches content edits (file mtime), additions/deletions in nested dirs (the + * nested dir's own mtime), and deletions directly under the dir (the dir's own mtime). + * + * `node_modules` is ignored by default: component dirs symlink it to the shared workspace + * `node_modules`, so following it makes the scan ~60x larger and slower. Its contents are also + * irrelevant to source-derived caches (e.g. auto-detected dependencies come from source imports; + * install flows clear those caches explicitly). + */ +export async function buildDirsLastModifiedIndex( + cwd: string, + dirs: string[], + ignore: string[] = ['**/node_modules/**'] +): Promise> { + const uniqDirs = [...new Set(dirs.filter(Boolean))]; + const dirSet = new Set(uniqDirs); + const index = new Map(); + const bump = (dir: string, mtimeMs: number) => { + const current = index.get(dir); + if (current === undefined || mtimeMs > current) index.set(dir, mtimeMs); + }; + // one recursive scan of all dirs, returning files + nested dirs together with their stats. + const entries = (await globby(uniqDirs, { + cwd, + stats: true, + onlyFiles: false, + dot: true, + ignore, + })) as unknown as GlobbyStatEntry[]; + for (const entry of entries) { + const owner = ownerDir(entry.path, dirSet); + if (owner) bump(owner, entry.stats?.mtimeMs ?? 0); + } + // globby returns the *contents* of each dir, not the dir itself; stat the dirs so a deletion + // directly under one (which only bumps that dir's own mtime) is still reflected. + await Promise.all( + uniqDirs.map(async (dir) => { + const stat = await getPathStatIfExist(nodePath.join(cwd, dir)); + if (stat) bump(dir, stat.mtimeMs); + }) + ); + return index; +} diff --git a/scopes/workspace/modules/fs-cache/fs-cache.ts b/scopes/workspace/modules/fs-cache/fs-cache.ts index d36ca11a047c..b2585bbce96f 100644 --- a/scopes/workspace/modules/fs-cache/fs-cache.ts +++ b/scopes/workspace/modules/fs-cache/fs-cache.ts @@ -14,11 +14,48 @@ const DEPS = 'deps'; export class FsCache { readonly basePath: PathOsBasedAbsolute; protected isNoFsCacheFeatureEnabled: boolean; + // command-scoped index of component rootDir -> last-modified mtimeMs, used to invalidate the + // dependencies fs-cache with a single workspace scan instead of a per-component one. invalidated + // by the workspace's clearAllComponentsCache / clearComponentCache (e.g. on watch file changes). + private componentsMtimeIndex?: Map; + private componentsMtimeIndexBuilding?: Promise>; + private componentsMtimeIndexGen = 0; constructor(private scopePath: string) { this.basePath = path.join(this.scopePath, WORKSPACE_CACHE, COMPONENTS_CACHE); this.isNoFsCacheFeatureEnabled = isFeatureEnabled(NO_FS_CACHE_FEATURE); } + /** + * return the shared components last-modified index, building it once via `build` and memoizing it + * for the lifetime of this cache (a command, or until invalidated). concurrent first-callers share + * a single build. + */ + async getOrBuildComponentsMtimeIndex(build: () => Promise>): Promise> { + if (this.componentsMtimeIndex) return this.componentsMtimeIndex; + if (!this.componentsMtimeIndexBuilding) { + const gen = this.componentsMtimeIndexGen; + this.componentsMtimeIndexBuilding = build().then((index) => { + // if the index was cleared while building, don't cache this now-stale result as canonical. + if (gen === this.componentsMtimeIndexGen) this.componentsMtimeIndex = index; + this.componentsMtimeIndexBuilding = undefined; + return index; + }); + } + return this.componentsMtimeIndexBuilding; + } + + /** drop the whole index (e.g. on a full workspace cache clear). */ + clearComponentsMtimeIndex() { + this.componentsMtimeIndex = undefined; + this.componentsMtimeIndexBuilding = undefined; + this.componentsMtimeIndexGen += 1; + } + + /** drop a single component's entry so its next load recomputes it (e.g. on a watch file change). */ + deleteComponentMtimeIndexEntry(rootDir: string) { + this.componentsMtimeIndex?.delete(rootDir); + } + async getDocsFromCache(filePath: string): Promise<{ timestamp: number; data: string } | null> { return this.getStringDataFromCache(filePath, DOCS); } diff --git a/scopes/workspace/workspace/component-loading-redesign.md b/scopes/workspace/workspace/component-loading-redesign.md index 4391f3347118..614eb69a4a78 100644 --- a/scopes/workspace/workspace/component-loading-redesign.md +++ b/scopes/workspace/workspace/component-loading-redesign.md @@ -1,7 +1,7 @@ # Component Loading Redesign **Status:** Phase 1 shipped; Phase 2 in progress -**Last updated:** 2026-06-15 (code references are against `master` @ `59855b104`; line numbers will drift) +**Last updated:** 2026-06-23 (code references are against `master`; line numbers will drift) This document is the source of truth for a multi-phase effort to simplify Bit's component-loading mechanism: fewer caches, a staged (lazy) loading pipeline, a single env/aspect load planner, and a @@ -204,11 +204,15 @@ earlier ones teach us). ### Phase 2 — Quick perf wins on existing seams - [x] Benchmark harness committed + baseline recorded (see §4) — **gate for the rest of the phase** -- [ ] Lazy file contents in `ModelComponent.toConsumerComponent` +- [x] Batch the deps-cache invalidation scan: one `node_modules`-ignoring workspace scan shared via a + command-scoped mtime index, replacing the per-component recursive `globby`. Cuts warm `bit + status` fs syscalls ~40% (74.3k→44.8k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). +- [ ] `on-load` slot-laziness (`loadDocs: false, loadCompositions: false` for non-UI flows) — the + largest CPU-bound stage (9.2s) and the next **warm-wall** target +- [ ] Lazy file contents in `ModelComponent.toConsumerComponent` (helps `graph`, not `status`) - [ ] `bit deps usage`: ids + stored deps instead of full load - [ ] IDE metadata endpoint (`api-for-ide.ts`): S0-S2-level data only - [ ] `bit remove` / forking: drop full-component loads where only ids/paths are used -- [ ] Default `loadDocs: false, loadCompositions: false` for non-UI flows ### Phase 3 — Cache consolidation @@ -270,13 +274,13 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency **`bit status` (warm, ~13s wall):** -| stage | aggregate self-time | ~wall | note | -| ----------------------------------------------------------- | ------------------- | ----- | --------------------------------------------- | -| `legacy-load-deps` | 43s | ~7s | dependency-object materialization (see below) | -| `on-load` (slot handlers: docs, compositions, schema, pkg…) | 10s | ~1.7s | trimmable for non-UI flows | -| `dependency-resolution` (Harmony resolver) | 7.7s | ~1.3s | | -| `execute-load-slot` (own) | 5.8s | ~1s | | -| `consumer-fs-load` (file content reads) | negligible | — | not a `status` cost | +| stage | aggregate self-time | ~wall | note | +| ----------------------------------------------------------- | ------------------- | ----- | ----------------------------------------------------------- | +| `legacy-load-deps` | 43s | ~7s | filesystem traversal in deps-cache invalidation (see below) | +| `on-load` (slot handlers: docs, compositions, schema, pkg…) | 10s | ~1.7s | trimmable for non-UI flows | +| `dependency-resolution` (Harmony resolver) | 7.7s | ~1.3s | | +| `execute-load-slot` (own) | 5.8s | ~1s | | +| `consumer-fs-load` (file content reads) | negligible | — | not a `status` cost | **`bit graph --json` (warm, ~20s wall; ~7.5s of it is loading):** @@ -285,23 +289,35 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency | `consumer-fs-load` (file content reads) | 5.8s | dominant load cost for graph | | `legacy-load-deps` | 1.2s | graph uses a lighter load path | -**Key conclusions (validated, not hypothesized):** +**Key conclusions (validated by measurement; these _supersede_ the earlier "object materialization" +hypothesis, which deeper sub-step instrumentation disproved):** - **The dependency FS cache works.** On a warm `bit status`, dep loading is **635 cache hits, 0 misses/recomputes**. `legacy-load-deps` is _not_ re-resolving dependencies. -- **`status`'s dominant cost (~7s wall) is dependency-object _materialization on cache hit_** — - `DependenciesData.deserialize` + reconstructing full `Dependency`/`DependencyList` objects + - `applyOverrides`, for every component, even though `status` reads little of it. This is the - "all-or-nothing, always fully materialize" problem of §1.1 — **structural, not a cache bug.** - Reducing it needs the staged/lazy-loading work (defer dependency-object construction), **not a - Phase-2 quick fix.** Earlier framing of this as "39s" was aggregate-concurrent self-time, not - wall; wall is ~13s. -- **`graph`'s dominant load cost is file-content reads (`consumer-fs-load`, 5.8s)** → this is what - **lazy file contents** (§2.1) targets; validated as a real win for `graph`/scope-side loads, but - it does **not** help `status` (whose file reads are negligible). -- Implication for Phase-2 ordering: lazy file contents helps `graph`; per-command partial loads help - `deps usage`/IDE/`remove`/forking; `loadDocs/loadCompositions: false` trims `status`'s slot work - (~1.7s). The big `status` number is deferred to the staged-loading phase. +- **`legacy-load-deps` is filesystem I/O, not object materialization.** Sub-step timing of the warm + cache-hit path (313 components, aggregate self-time): `statFiles` **22.3s** + `cacheRead` **10.9s**, + versus `deserialize` **9ms**, `applyOverrides` 0.4s, `updateVersions` 0.1s. The cost is the + deps-cache _read + invalidation_ layer paid per component — **not** `DependenciesData.deserialize` / + `Dependency` reconstruction (negligible). `statFiles` was a recursive `globby` per component that + **followed the component's `node_modules` symlink into the shared workspace `node_modules`** (226k + of 230k scanned entries), run 313× per command. +- **Aggregate self-time ≠ wall — sharply.** Batching that into one `node_modules`-ignoring workspace + scan cut `statFiles` 22.3s→0.15s aggregate and **fs syscalls 74.3k→44.8k per warm `bit status` + (~40%)** — yet a same-state wall A/B moved warm wall by **~0.3s**. The removed work is I/O-_wait_ + that overlaps with CPU on the single JS thread; on a warm SSD it was never on the critical path + (real win on cold/CI/networked filesystems, where it is). `cacheRead` (10.9s) is likewise I/O-wait, + so consolidating it would also be warm-wall-neutral. +- **The warm-wall bottleneck is CPU-bound, single-threaded JS** — the stages whose self-time ≈ their + wall contribution: **`on-load` slot handlers (9.2s)**, **`dependency-resolution` (7.2s)**, + **`workspace.get` (5s)**. These — not the deps-cache I/O — are what move warm `status` wall. +- **`graph`'s dominant load cost is file-content reads (`consumer-fs-load`, 5.8s)** → the target for + **lazy file contents** (§2.1); a real win for `graph`/scope-side loads, but it does **not** help + `status` (whose file reads are negligible). +- Implication for Phase-2 ordering: the deps-cache invalidation batch ships as a standalone fs/CPU + efficiency win (helps cold/CI, warm-wall-neutral). The next **warm-wall** target is `on-load` + slot-laziness (`loadDocs`/`loadCompositions: false` for non-UI flows) — the largest CPU-bound stage. + Earlier framing of the deps cost as "39s"/"materialization" was both an aggregate-vs-wall and a + cause misread; corrected here. --- @@ -340,3 +356,17 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency file-content reads (`consumer-fs-load`, 5.8s) → the target for lazy file contents. (Correction: an earlier "39s" figure was aggregate-concurrent self-time, not wall; warm wall is ~13s.) Direction for the next Phase-2 PR intentionally left open. +- 2026-06-23 — Deeper sub-step instrumentation **disproved the "object materialization" conclusion** + (see §4.1). The warm `legacy-load-deps` cost is filesystem I/O in the deps-cache invalidation: + `statFiles` 22.3s + `cacheRead` 10.9s aggregate, while `deserialize` is 9ms. `statFiles` was a + per-component recursive `globby` that followed each component's `node_modules` symlink into the + shared `node_modules` (226k/230k scanned entries), run 313× per command. Shipped the first Phase-2 + perf change: a command-scoped, `node_modules`-ignoring **batched mtime index** + (`buildDirsLastModifiedIndex` in `@teambit/toolbox.fs.last-modified`, memoized on `FsCache`, + invalidated via `workspace.clearAllComponentsCache`/`clearComponentCache`). Result: `statFiles` + 22.3s→0.15s aggregate, **warm `bit status` fs syscalls 74.3k→44.8k (~40%)**, `readFile` traffic + unchanged (no regression, checked against the bootstrap fs-read e2e metric). Key lesson reaffirmed: + aggregate self-time ≠ wall — a same-state A/B moved warm wall only ~0.3s because the cut work was + I/O-wait overlapping CPU on the single JS thread (real win on cold/CI/networked FS). The warm-wall + bottleneck is CPU-bound: `on-load` (9.2s), `dependency-resolution` (7.2s), `workspace.get` (5s) — + next warm-wall target is `on-load` slot-laziness. diff --git a/scopes/workspace/workspace/workspace.ts b/scopes/workspace/workspace/workspace.ts index ae928f81f139..2fd0e6985311 100644 --- a/scopes/workspace/workspace/workspace.ts +++ b/scopes/workspace/workspace/workspace.ts @@ -833,6 +833,7 @@ it's possible that the version ${component.id.version} belong to ${idStr.split(' this.consumer.componentLoader.clearComponentsCache(); this.componentStatusLoader.clearCache(); this.aggregatedLoadFailures.clear(); + this.consumer.componentFsCache.clearComponentsMtimeIndex(); this._componentList = new ComponentsList(this); } @@ -840,6 +841,8 @@ it's possible that the version ${component.id.version} belong to ${idStr.split(' this.componentLoader.clearComponentCache(id); this.componentStatusLoader.clearOneComponentCache(id); this.consumer.clearOneComponentCache(id); + const componentDir = this.consumer.bitMap.getComponentIfExist(id, { ignoreVersion: true })?.getComponentDir(); + if (componentDir) this.consumer.componentFsCache.deleteComponentMtimeIndexEntry(componentDir); this._componentList = new ComponentsList(this); } From db0e386c33d7435c48db3b55f7bc33dbba341a4c Mon Sep 17 00:00:00 2001 From: David First Date: Tue, 23 Jun 2026 15:12:17 -0400 Subject: [PATCH 2/5] docs: link PR #10445 in component-loading redesign status --- scopes/workspace/workspace/component-loading-redesign.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scopes/workspace/workspace/component-loading-redesign.md b/scopes/workspace/workspace/component-loading-redesign.md index 614eb69a4a78..d4561f98fc2d 100644 --- a/scopes/workspace/workspace/component-loading-redesign.md +++ b/scopes/workspace/workspace/component-loading-redesign.md @@ -206,7 +206,7 @@ earlier ones teach us). - [x] Benchmark harness committed + baseline recorded (see §4) — **gate for the rest of the phase** - [x] Batch the deps-cache invalidation scan: one `node_modules`-ignoring workspace scan shared via a command-scoped mtime index, replacing the per-component recursive `globby`. Cuts warm `bit - status` fs syscalls ~40% (74.3k→44.8k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). + status` fs syscalls ~40% (74.3k→44.8k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). - [ ] `on-load` slot-laziness (`loadDocs: false, loadCompositions: false` for non-UI flows) — the largest CPU-bound stage (9.2s) and the next **warm-wall** target - [ ] Lazy file contents in `ModelComponent.toConsumerComponent` (helps `graph`, not `status`) @@ -326,7 +326,7 @@ hypothesis, which deeper sub-step instrumentation disproved):** | Phase | State | OpenSpec change | PRs | | ----------------------- | ----------- | ------------------------------ | --------------------------------------------------- | | 1 — Observability | done | `component-load-observability` | [#10418](https://github.com/teambit/bit/pull/10418) | -| 2 — Quick perf wins | in progress | — | — | +| 2 — Quick perf wins | in progress | — | [#10445](https://github.com/teambit/bit/pull/10445) | | 3 — Cache consolidation | not started | — | — | | 4 — Staged pipeline | not started | — | — | | 5 — Env planner | not started | — | — | From db242dc95a19844f572c8fe1aa19d9137a993908 Mon Sep 17 00:00:00 2001 From: David First Date: Tue, 23 Jun 2026 15:45:28 -0400 Subject: [PATCH 3/5] fix(workspace): harden components mtime index against build failure and watch races Address qodo review on the deps-cache invalidation index: - clear the in-flight build promise in `finally`, so a transient build rejection (glob/stat error) no longer poisons all later reads in a long-lived process (watch/start). - in `deleteComponentMtimeIndexEntry`, bump the generation when a build is in-flight, so a watch-triggered clear that races the first build discards that build's result instead of caching the now-stale entry as canonical. - fall back to the per-component scan if the centralized index build throws, preserving fault isolation (one bad dir no longer fails every load). --- .../dependencies-loader.ts | 23 ++++++++++++------- scopes/workspace/modules/fs-cache/fs-cache.ts | 21 ++++++++++++----- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts index 56b85203304e..4dd807f2123a 100644 --- a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts +++ b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts @@ -152,18 +152,25 @@ export class DependenciesLoader { * single-component cache clear (watch) or for a component added since the scan. */ private async getComponentLastModified(workspace: Workspace, rootDir: string): Promise { - const index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() => - buildDirsLastModifiedIndex( - workspace.path, - workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir()) - ) - ); - const fromIndex = index.get(rootDir); + let index: Map | undefined; + try { + index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() => + buildDirsLastModifiedIndex( + workspace.path, + workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir()) + ) + ); + } catch (err: any) { + // a centralized scan failure (e.g. a filesystem error on one dir) shouldn't fail every + // component's load — fall back to the per-component scan below, preserving fault isolation. + this.logger.debug(`dependencies-loader, failed building the components mtime index: ${err?.message || err}`); + } + const fromIndex = index?.get(rootDir); if (fromIndex !== undefined) return fromIndex; const filesPaths = this.component.files.map((file) => file.path); filesPaths.push(path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME)); const lastModified = await getLastModifiedComponentTimestampMs(rootDir, filesPaths); - index.set(rootDir, lastModified); + index?.set(rootDir, lastModified); return lastModified; } diff --git a/scopes/workspace/modules/fs-cache/fs-cache.ts b/scopes/workspace/modules/fs-cache/fs-cache.ts index b2585bbce96f..640a92268077 100644 --- a/scopes/workspace/modules/fs-cache/fs-cache.ts +++ b/scopes/workspace/modules/fs-cache/fs-cache.ts @@ -34,12 +34,18 @@ export class FsCache { if (this.componentsMtimeIndex) return this.componentsMtimeIndex; if (!this.componentsMtimeIndexBuilding) { const gen = this.componentsMtimeIndexGen; - this.componentsMtimeIndexBuilding = build().then((index) => { - // if the index was cleared while building, don't cache this now-stale result as canonical. - if (gen === this.componentsMtimeIndexGen) this.componentsMtimeIndex = index; - this.componentsMtimeIndexBuilding = undefined; - return index; - }); + const building = build() + .then((index) => { + // if the index was cleared/invalidated while building, don't cache this stale result as canonical. + if (gen === this.componentsMtimeIndexGen) this.componentsMtimeIndex = index; + return index; + }) + .finally(() => { + // clear on both success and failure so a transient build error doesn't poison future reads; + // guard against clobbering a newer build started after an invalidation. + if (this.componentsMtimeIndexBuilding === building) this.componentsMtimeIndexBuilding = undefined; + }); + this.componentsMtimeIndexBuilding = building; } return this.componentsMtimeIndexBuilding; } @@ -54,6 +60,9 @@ export class FsCache { /** drop a single component's entry so its next load recomputes it (e.g. on a watch file change). */ deleteComponentMtimeIndexEntry(rootDir: string) { this.componentsMtimeIndex?.delete(rootDir); + // if a build is in flight it may already contain this now-stale entry; bump the generation so its + // result won't be cached as canonical, forcing the next read to rebuild (and re-stat this component). + if (this.componentsMtimeIndexBuilding) this.componentsMtimeIndexGen += 1; } async getDocsFromCache(filePath: string): Promise<{ timestamp: number; data: string } | null> { From 3b9a34fae1e13c650049de2bdf3bb222bbc789e3 Mon Sep 17 00:00:00 2001 From: David First Date: Wed, 24 Jun 2026 10:35:20 -0400 Subject: [PATCH 4/5] fix(workspace): scan only the node_modules footprint the deps cache needs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The first cut ignored node_modules entirely, which is a correctness regression: auto-detect resolves imports *through* node_modules and reads each direct dep's package.json (name/componentId), so the cached result depends on node_modules content — ignoring it drops the invalidation safety net. Restrict instead of ignore: scan component source (excluding the deep node_modules subtree) plus each component's `node_modules` and `node_modules/@scope` directory mtimes. A dep add/remove/version-relink/ componentId-change all go through a relink that bumps those dirs, and the dependency-tree builder stops at the package boundary, so the deep tree and transitive store are irrelevant to the cache. Warm `bit status` fs syscalls 74.3k -> 46.9k (~37%); correctness preserved (verified: source, node_modules, and @scope changes all invalidate). --- .../dependencies-loader.ts | 4 +- scopes/toolbox/fs/last-modified/index.ts | 2 +- .../toolbox/fs/last-modified/last-modified.ts | 87 ++++++++++++------- .../workspace/component-loading-redesign.md | 47 ++++++---- 4 files changed, 86 insertions(+), 54 deletions(-) diff --git a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts index 4dd807f2123a..7eb70f5480e8 100644 --- a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts +++ b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts @@ -4,7 +4,7 @@ import { IssuesClasses } from '@teambit/component-issues'; import { getLastModifiedComponentTimestampMs, getLastModifiedPathsTimestampMs, - buildDirsLastModifiedIndex, + buildComponentDirsLastModifiedIndex, } from '@teambit/toolbox.fs.last-modified'; import { ExtensionDataEntry } from '@teambit/legacy.extension-data'; import type { DependencyLoaderOpts, ConsumerComponent as Component } from '@teambit/legacy.consumer-component'; @@ -155,7 +155,7 @@ export class DependenciesLoader { let index: Map | undefined; try { index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() => - buildDirsLastModifiedIndex( + buildComponentDirsLastModifiedIndex( workspace.path, workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir()) ) diff --git a/scopes/toolbox/fs/last-modified/index.ts b/scopes/toolbox/fs/last-modified/index.ts index d559a0875783..e94b172db739 100644 --- a/scopes/toolbox/fs/last-modified/index.ts +++ b/scopes/toolbox/fs/last-modified/index.ts @@ -2,5 +2,5 @@ export { getLastModifiedPathsTimestampMs, getPathStatIfExist, getLastModifiedComponentTimestampMs, - buildDirsLastModifiedIndex, + buildComponentDirsLastModifiedIndex, } from './last-modified'; diff --git a/scopes/toolbox/fs/last-modified/last-modified.ts b/scopes/toolbox/fs/last-modified/last-modified.ts index bdb805ca84e2..f101643ffe25 100644 --- a/scopes/toolbox/fs/last-modified/last-modified.ts +++ b/scopes/toolbox/fs/last-modified/last-modified.ts @@ -54,24 +54,23 @@ function ownerDir(relPath: string, dirSet: Set): string | undefined { } /** - * build a last-modified index for many directories with a *single* filesystem scan, keyed by each - * input dir (relative to `cwd`). the value is the max mtime over every file and nested directory - * under that dir, plus the dir's own mtime. equivalent to calling `getLastModifiedComponentTimestampMs` - * per dir, but replaces N recursive `globby` scans with one — the hot path on large workspaces. + * build a last-modified index for many component dirs with a few batched filesystem scans, keyed by + * each input dir (relative to `cwd`). replaces the old per-component recursive `globby` (the hot path + * on large workspaces) while preserving the exact freshness signals the dependency cache depends on. * - * the per-dir value catches content edits (file mtime), additions/deletions in nested dirs (the - * nested dir's own mtime), and deletions directly under the dir (the dir's own mtime). + * what's scanned, and why (the dependency auto-detect — whose result is cached — never traverses + * *into* a node_modules package, so the package internals and the transitive store are irrelevant): + * - the component's own source files + dirs (catches import/source changes). the deep `node_modules` + * subtree is skipped here — a package's internals can't change the cached result. + * - each component's `node_modules` and `node_modules/@scope` directory mtimes — these catch a direct + * dependency added / removed / version-relinked, which is how the meaningful node_modules changes + * reach a component (install/link rewrite the symlink entry, bumping the containing dir). a direct + * dep's `package.json` `componentId`/`name` change (package <-> component) likewise goes through a + * relink, so it's covered by the same dir-mtime signal without scanning every manifest. * - * `node_modules` is ignored by default: component dirs symlink it to the shared workspace - * `node_modules`, so following it makes the scan ~60x larger and slower. Its contents are also - * irrelevant to source-derived caches (e.g. auto-detected dependencies come from source imports; - * install flows clear those caches explicitly). + * per-dir value = max mtime over all of the above, plus the dir's own mtime (deletion directly under it). */ -export async function buildDirsLastModifiedIndex( - cwd: string, - dirs: string[], - ignore: string[] = ['**/node_modules/**'] -): Promise> { +export async function buildComponentDirsLastModifiedIndex(cwd: string, dirs: string[]): Promise> { const uniqDirs = [...new Set(dirs.filter(Boolean))]; const dirSet = new Set(uniqDirs); const index = new Map(); @@ -79,25 +78,47 @@ export async function buildDirsLastModifiedIndex( const current = index.get(dir); if (current === undefined || mtimeMs > current) index.set(dir, mtimeMs); }; - // one recursive scan of all dirs, returning files + nested dirs together with their stats. - const entries = (await globby(uniqDirs, { - cwd, - stats: true, - onlyFiles: false, - dot: true, - ignore, - })) as unknown as GlobbyStatEntry[]; - for (const entry of entries) { - const owner = ownerDir(entry.path, dirSet); - if (owner) bump(owner, entry.stats?.mtimeMs ?? 0); - } - // globby returns the *contents* of each dir, not the dir itself; stat the dirs so a deletion - // directly under one (which only bumps that dir's own mtime) is still reflected. + const collect = (entries: GlobbyStatEntry[]) => { + for (const entry of entries) { + const owner = ownerDir(entry.path, dirSet); + if (owner) bump(owner, entry.stats?.mtimeMs ?? 0); + } + }; + + // 1. source: recurse the component dirs, skipping the deep node_modules subtree. + collect( + (await globby(uniqDirs, { + cwd, + stats: true, + onlyFiles: false, + dot: true, + ignore: ['**/node_modules/**'], + })) as unknown as GlobbyStatEntry[] + ); + + // 2. the node_modules structure the deps cache depends on: the `@scope` dir mtimes (catch a scoped + // dep added/removed/relinked within an existing scope). single-segment globs — they return the + // scope dirs themselves, never recursing into them (a bare `node_modules` glob *would* recurse + // the whole symlinked tree, so the `node_modules` dir mtime is taken via a direct stat below). + const scopeDirPatterns = uniqDirs.map((dir) => `${dir}/node_modules/@*`); + collect( + (await globby(scopeDirPatterns, { + cwd, + stats: true, + onlyFiles: false, + dot: true, + followSymbolicLinks: false, + })) as unknown as GlobbyStatEntry[] + ); + + // stat directly (not globbed): each rootDir — globby returns its *contents*, not the dir, so a + // deletion directly under it would otherwise be missed; and each `node_modules` dir — its mtime + // catches a top-level dep added/removed (a bare-dir glob would recurse the whole tree). await Promise.all( - uniqDirs.map(async (dir) => { - const stat = await getPathStatIfExist(nodePath.join(cwd, dir)); - if (stat) bump(dir, stat.mtimeMs); - }) + uniqDirs.flatMap((dir) => [ + getPathStatIfExist(nodePath.join(cwd, dir)).then((stat) => stat && bump(dir, stat.mtimeMs)), + getPathStatIfExist(nodePath.join(cwd, dir, 'node_modules')).then((stat) => stat && bump(dir, stat.mtimeMs)), + ]) ); return index; } diff --git a/scopes/workspace/workspace/component-loading-redesign.md b/scopes/workspace/workspace/component-loading-redesign.md index d4561f98fc2d..b2aebbd27c48 100644 --- a/scopes/workspace/workspace/component-loading-redesign.md +++ b/scopes/workspace/workspace/component-loading-redesign.md @@ -204,9 +204,11 @@ earlier ones teach us). ### Phase 2 — Quick perf wins on existing seams - [x] Benchmark harness committed + baseline recorded (see §4) — **gate for the rest of the phase** -- [x] Batch the deps-cache invalidation scan: one `node_modules`-ignoring workspace scan shared via a - command-scoped mtime index, replacing the per-component recursive `globby`. Cuts warm `bit - status` fs syscalls ~40% (74.3k→44.8k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). +- [x] Batch the deps-cache invalidation scan: a command-scoped mtime index built from a few batched + `globby` scans, replacing the per-component recursive `globby`. Restricts the node_modules + traversal to the footprint the cache depends on (source + each component's `node_modules`/`@scope` + dir mtimes, no deep tree — auto-detect never traverses _into_ a package). Cuts warm `bit status` + fs syscalls ~37% (74.3k→46.9k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). - [ ] `on-load` slot-laziness (`loadDocs: false, loadCompositions: false` for non-UI flows) — the largest CPU-bound stage (9.2s) and the next **warm-wall** target - [ ] Lazy file contents in `ModelComponent.toConsumerComponent` (helps `graph`, not `status`) @@ -301,12 +303,16 @@ hypothesis, which deeper sub-step instrumentation disproved):** `Dependency` reconstruction (negligible). `statFiles` was a recursive `globby` per component that **followed the component's `node_modules` symlink into the shared workspace `node_modules`** (226k of 230k scanned entries), run 313× per command. -- **Aggregate self-time ≠ wall — sharply.** Batching that into one `node_modules`-ignoring workspace - scan cut `statFiles` 22.3s→0.15s aggregate and **fs syscalls 74.3k→44.8k per warm `bit status` - (~40%)** — yet a same-state wall A/B moved warm wall by **~0.3s**. The removed work is I/O-_wait_ - that overlaps with CPU on the single JS thread; on a warm SSD it was never on the critical path - (real win on cold/CI/networked filesystems, where it is). `cacheRead` (10.9s) is likewise I/O-wait, - so consolidating it would also be warm-wall-neutral. +- **Aggregate self-time ≠ wall — sharply.** Batching that scan and restricting the node*modules + traversal to the footprint the cache depends on (the dependency auto-detect never traverses *into* a + package, so only the component's `node_modules`/`@scope` dir mtimes matter — a dep add/remove/relink + bumps them; the deep tree and transitive store are irrelevant) cut `statFiles` 22.3s→~0.5s aggregate + and **fs syscalls 74.3k→46.9k per warm `bit status` (~37%)** — yet a same-state wall A/B moved warm + wall by **~0.3s**. The removed work is I/O-\_wait* that overlaps with CPU on the single JS thread; on + a warm SSD it was never on the critical path (real win on cold/CI/networked filesystems, where it + is). `cacheRead` (10.9s) is likewise I/O-wait, so consolidating it would also be warm-wall-neutral. + (An earlier iteration _ignored_ node_modules entirely — faster still, but a correctness regression: + the cache resolves imports through node_modules, so its structure must invalidate the cache.) - **The warm-wall bottleneck is CPU-bound, single-threaded JS** — the stages whose self-time ≈ their wall contribution: **`on-load` slot handlers (9.2s)**, **`dependency-resolution` (7.2s)**, **`workspace.get` (5s)**. These — not the deps-cache I/O — are what move warm `status` wall. @@ -361,12 +367,17 @@ hypothesis, which deeper sub-step instrumentation disproved):** `statFiles` 22.3s + `cacheRead` 10.9s aggregate, while `deserialize` is 9ms. `statFiles` was a per-component recursive `globby` that followed each component's `node_modules` symlink into the shared `node_modules` (226k/230k scanned entries), run 313× per command. Shipped the first Phase-2 - perf change: a command-scoped, `node_modules`-ignoring **batched mtime index** - (`buildDirsLastModifiedIndex` in `@teambit/toolbox.fs.last-modified`, memoized on `FsCache`, - invalidated via `workspace.clearAllComponentsCache`/`clearComponentCache`). Result: `statFiles` - 22.3s→0.15s aggregate, **warm `bit status` fs syscalls 74.3k→44.8k (~40%)**, `readFile` traffic - unchanged (no regression, checked against the bootstrap fs-read e2e metric). Key lesson reaffirmed: - aggregate self-time ≠ wall — a same-state A/B moved warm wall only ~0.3s because the cut work was - I/O-wait overlapping CPU on the single JS thread (real win on cold/CI/networked FS). The warm-wall - bottleneck is CPU-bound: `on-load` (9.2s), `dependency-resolution` (7.2s), `workspace.get` (5s) — - next warm-wall target is `on-load` slot-laziness. + perf change: a command-scoped **batched mtime index** (`buildComponentDirsLastModifiedIndex` in + `@teambit/toolbox.fs.last-modified`, memoized on `FsCache`, invalidated via + `workspace.clearAllComponentsCache`/`clearComponentCache`). Result: **warm `bit status` fs syscalls + 74.3k→46.9k (~37%)**, `readFile` traffic unchanged (no regression, checked against the bootstrap + fs-read e2e metric). **Important correctness lesson:** a first cut simply _ignored_ node_modules + (74.3k→44.8k) — but the deps auto-detect resolves imports _through_ node_modules and reads each + direct dep's `package.json` (`name`/`componentId`), so the cache depends on node_modules content; + ignoring it is a regression. The tree builder stops at the package boundary + (`generate-tree-madge.ts` filter), so only the component's `node_modules`/`@scope` dir mtimes are + needed (a dep add/remove/relink bumps them) — not the deep tree. See [[deps-cache-node-modules-invalidation]]. + Key perf lesson reaffirmed: aggregate self-time ≠ wall — a same-state A/B moved warm wall only ~0.3s + because the cut work was I/O-wait overlapping CPU on the single JS thread (real win on cold/CI/ + networked FS). The warm-wall bottleneck is CPU-bound: `on-load` (9.2s), `dependency-resolution` + (7.2s), `workspace.get` (5s) — next warm-wall target is `on-load` slot-laziness. From 7e1857bea9b08ac3cf3d1cc01f8fcd7db1b0bbde Mon Sep 17 00:00:00 2001 From: David First Date: Wed, 24 Jun 2026 16:59:25 -0400 Subject: [PATCH 5/5] refactor(workspace): drop the shared mtime index, restrict the per-component scan instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared command-scoped index added memoization/invalidation complexity (and the two reliability bugs qodo flagged) for no syscall benefit — batching saves nothing here, and it over-scanned single-component commands (bit show would scan all components to check one). Reverted it (fs-cache, workspace, dependencies-loader back to their original state; deps fs-cache and per-component invalidation untouched). Keep only the actual win: the per-component freshness scan (getLastModifiedDirTimestampMs) now stops at the node_modules boundary and takes just the node_modules + node_modules/@scope dir mtimes — auto-detect never traverses into a package, so the deep tree/transitive store are irrelevant. Warm `bit status` fs syscalls 74.3k -> 46.4k (~37%); correctness verified (source / node_modules / @scope changes all invalidate). --- .../dependencies-loader.ts | 36 +----- scopes/toolbox/fs/last-modified/index.ts | 1 - .../toolbox/fs/last-modified/last-modified.ts | 112 ++++-------------- scopes/workspace/modules/fs-cache/fs-cache.ts | 46 ------- .../workspace/component-loading-redesign.md | 40 ++++--- scopes/workspace/workspace/workspace.ts | 3 - 6 files changed, 46 insertions(+), 192 deletions(-) diff --git a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts index 7eb70f5480e8..31a11ee5247d 100644 --- a/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts +++ b/scopes/dependencies/dependencies/dependencies-loader/dependencies-loader.ts @@ -4,7 +4,6 @@ import { IssuesClasses } from '@teambit/component-issues'; import { getLastModifiedComponentTimestampMs, getLastModifiedPathsTimestampMs, - buildComponentDirsLastModifiedIndex, } from '@teambit/toolbox.fs.last-modified'; import { ExtensionDataEntry } from '@teambit/legacy.extension-data'; import type { DependencyLoaderOpts, ConsumerComponent as Component } from '@teambit/legacy.consumer-component'; @@ -120,7 +119,10 @@ export class DependenciesLoader { // to invalidate the cache in such a case. return null; } - const lastModifiedComponent = await this.getComponentLastModified(workspace, rootDir); + const filesPaths = this.component.files.map((f) => f.path); + const componentConfigPath = path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME); + filesPaths.push(componentConfigPath); + const lastModifiedComponent = await getLastModifiedComponentTimestampMs(rootDir, filesPaths); const wasModifiedAfterCache = lastModifiedComponent > cacheData.timestamp; if (wasModifiedAfterCache) { @@ -144,36 +146,6 @@ export class DependenciesLoader { return DependenciesData.deserialize(cacheData.data); } - /** - * last-modified time of this component's files, used to decide whether the cached deps are stale. - * reads from a workspace-wide index built with a single filesystem scan and shared across all - * components in the command (instead of a recursive per-component scan — the hot path on large - * workspaces). falls back to a per-component scan when the entry isn't in the index, e.g. after a - * single-component cache clear (watch) or for a component added since the scan. - */ - private async getComponentLastModified(workspace: Workspace, rootDir: string): Promise { - let index: Map | undefined; - try { - index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() => - buildComponentDirsLastModifiedIndex( - workspace.path, - workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir()) - ) - ); - } catch (err: any) { - // a centralized scan failure (e.g. a filesystem error on one dir) shouldn't fail every - // component's load — fall back to the per-component scan below, preserving fault isolation. - this.logger.debug(`dependencies-loader, failed building the components mtime index: ${err?.message || err}`); - } - const fromIndex = index?.get(rootDir); - if (fromIndex !== undefined) return fromIndex; - const filesPaths = this.component.files.map((file) => file.path); - filesPaths.push(path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME)); - const lastModified = await getLastModifiedComponentTimestampMs(rootDir, filesPaths); - index?.set(rootDir, lastModified); - return lastModified; - } - private shouldSaveInCache(dependenciesData: DependenciesData, storeInFsCache = true) { if (!storeInFsCache) return false; if (!dependenciesData.issues) return true; diff --git a/scopes/toolbox/fs/last-modified/index.ts b/scopes/toolbox/fs/last-modified/index.ts index e94b172db739..5d9d3e9f80f6 100644 --- a/scopes/toolbox/fs/last-modified/index.ts +++ b/scopes/toolbox/fs/last-modified/index.ts @@ -2,5 +2,4 @@ export { getLastModifiedPathsTimestampMs, getPathStatIfExist, getLastModifiedComponentTimestampMs, - buildComponentDirsLastModifiedIndex, } from './last-modified'; diff --git a/scopes/toolbox/fs/last-modified/last-modified.ts b/scopes/toolbox/fs/last-modified/last-modified.ts index f101643ffe25..f88ea7afba67 100644 --- a/scopes/toolbox/fs/last-modified/last-modified.ts +++ b/scopes/toolbox/fs/last-modified/last-modified.ts @@ -1,22 +1,34 @@ -import nodePath from 'path'; import globby from 'globby'; import type { Stats } from 'fs-extra'; import fs from 'fs-extra'; import { compact } from 'lodash'; -type GlobbyStatEntry = { path: string; stats?: { mtimeMs: number } }; - /** - * check recursively all the sub-directories as well + * last-modified mtime of a component's directory structure (recursively), used as the dependency + * fs-cache freshness signal. + * + * the source tree is scanned recursively, but the scan stops at the `node_modules` boundary: the + * dependency auto-detect (whose cached result this guards) never traverses *into* a package, so the + * package internals and the transitive store are irrelevant — recursing `node_modules` followed the + * symlinked store and made this scan ~60x larger. Instead only the `node_modules` dir and its + * `@scope` dirs' mtimes are taken: a direct dep added / removed / version-relinked / componentId- + * changed all go through a relink that rewrites the symlink entry, bumping the containing dir. */ async function getLastModifiedDirTimestampMs(rootDir: string): Promise { - const allDirs = await globby(rootDir, { + // source subdirectories, excluding the deep node_modules subtree. + const sourceDirs = await globby(rootDir, { onlyDirectories: true, - // ignore: ['**/node_modules/**'], // need to think about it more. sometimes we do want to invalidate cache upon node_modules changes inside component dir - // stats: true // todo: consider retrieving the stats from here. + ignore: ['**/node_modules/**'], }); - allDirs.push(rootDir); - return getLastModifiedPathsTimestampMs(allDirs); + sourceDirs.push(rootDir); + // node_modules/@scope dirs (catch a scoped dep changing within an already-existing scope). a *bare* + // `node_modules` glob would recurse the whole symlinked tree, so its own dir mtime is taken via the + // direct stat in getLastModifiedPathsTimestampMs below, not globbed. + const scopeDirs = await globby(`${rootDir}/node_modules/@*`, { + onlyDirectories: true, + followSymbolicLinks: false, + }); + return getLastModifiedPathsTimestampMs([...sourceDirs, ...scopeDirs, `${rootDir}/node_modules`]); } export async function getLastModifiedPathsTimestampMs(paths: string[]): Promise { @@ -40,85 +52,3 @@ export async function getPathStatIfExist(path: string): Promise { throw err; } } - -/** - * find the directory in `dirSet` that owns `relPath` — the deepest dir that is a path-prefix of it. - */ -function ownerDir(relPath: string, dirSet: Set): string | undefined { - const parts = relPath.split('/'); - for (let i = parts.length - 1; i > 0; i -= 1) { - const candidate = parts.slice(0, i).join('/'); - if (dirSet.has(candidate)) return candidate; - } - return undefined; -} - -/** - * build a last-modified index for many component dirs with a few batched filesystem scans, keyed by - * each input dir (relative to `cwd`). replaces the old per-component recursive `globby` (the hot path - * on large workspaces) while preserving the exact freshness signals the dependency cache depends on. - * - * what's scanned, and why (the dependency auto-detect — whose result is cached — never traverses - * *into* a node_modules package, so the package internals and the transitive store are irrelevant): - * - the component's own source files + dirs (catches import/source changes). the deep `node_modules` - * subtree is skipped here — a package's internals can't change the cached result. - * - each component's `node_modules` and `node_modules/@scope` directory mtimes — these catch a direct - * dependency added / removed / version-relinked, which is how the meaningful node_modules changes - * reach a component (install/link rewrite the symlink entry, bumping the containing dir). a direct - * dep's `package.json` `componentId`/`name` change (package <-> component) likewise goes through a - * relink, so it's covered by the same dir-mtime signal without scanning every manifest. - * - * per-dir value = max mtime over all of the above, plus the dir's own mtime (deletion directly under it). - */ -export async function buildComponentDirsLastModifiedIndex(cwd: string, dirs: string[]): Promise> { - const uniqDirs = [...new Set(dirs.filter(Boolean))]; - const dirSet = new Set(uniqDirs); - const index = new Map(); - const bump = (dir: string, mtimeMs: number) => { - const current = index.get(dir); - if (current === undefined || mtimeMs > current) index.set(dir, mtimeMs); - }; - const collect = (entries: GlobbyStatEntry[]) => { - for (const entry of entries) { - const owner = ownerDir(entry.path, dirSet); - if (owner) bump(owner, entry.stats?.mtimeMs ?? 0); - } - }; - - // 1. source: recurse the component dirs, skipping the deep node_modules subtree. - collect( - (await globby(uniqDirs, { - cwd, - stats: true, - onlyFiles: false, - dot: true, - ignore: ['**/node_modules/**'], - })) as unknown as GlobbyStatEntry[] - ); - - // 2. the node_modules structure the deps cache depends on: the `@scope` dir mtimes (catch a scoped - // dep added/removed/relinked within an existing scope). single-segment globs — they return the - // scope dirs themselves, never recursing into them (a bare `node_modules` glob *would* recurse - // the whole symlinked tree, so the `node_modules` dir mtime is taken via a direct stat below). - const scopeDirPatterns = uniqDirs.map((dir) => `${dir}/node_modules/@*`); - collect( - (await globby(scopeDirPatterns, { - cwd, - stats: true, - onlyFiles: false, - dot: true, - followSymbolicLinks: false, - })) as unknown as GlobbyStatEntry[] - ); - - // stat directly (not globbed): each rootDir — globby returns its *contents*, not the dir, so a - // deletion directly under it would otherwise be missed; and each `node_modules` dir — its mtime - // catches a top-level dep added/removed (a bare-dir glob would recurse the whole tree). - await Promise.all( - uniqDirs.flatMap((dir) => [ - getPathStatIfExist(nodePath.join(cwd, dir)).then((stat) => stat && bump(dir, stat.mtimeMs)), - getPathStatIfExist(nodePath.join(cwd, dir, 'node_modules')).then((stat) => stat && bump(dir, stat.mtimeMs)), - ]) - ); - return index; -} diff --git a/scopes/workspace/modules/fs-cache/fs-cache.ts b/scopes/workspace/modules/fs-cache/fs-cache.ts index 640a92268077..d36ca11a047c 100644 --- a/scopes/workspace/modules/fs-cache/fs-cache.ts +++ b/scopes/workspace/modules/fs-cache/fs-cache.ts @@ -14,57 +14,11 @@ const DEPS = 'deps'; export class FsCache { readonly basePath: PathOsBasedAbsolute; protected isNoFsCacheFeatureEnabled: boolean; - // command-scoped index of component rootDir -> last-modified mtimeMs, used to invalidate the - // dependencies fs-cache with a single workspace scan instead of a per-component one. invalidated - // by the workspace's clearAllComponentsCache / clearComponentCache (e.g. on watch file changes). - private componentsMtimeIndex?: Map; - private componentsMtimeIndexBuilding?: Promise>; - private componentsMtimeIndexGen = 0; constructor(private scopePath: string) { this.basePath = path.join(this.scopePath, WORKSPACE_CACHE, COMPONENTS_CACHE); this.isNoFsCacheFeatureEnabled = isFeatureEnabled(NO_FS_CACHE_FEATURE); } - /** - * return the shared components last-modified index, building it once via `build` and memoizing it - * for the lifetime of this cache (a command, or until invalidated). concurrent first-callers share - * a single build. - */ - async getOrBuildComponentsMtimeIndex(build: () => Promise>): Promise> { - if (this.componentsMtimeIndex) return this.componentsMtimeIndex; - if (!this.componentsMtimeIndexBuilding) { - const gen = this.componentsMtimeIndexGen; - const building = build() - .then((index) => { - // if the index was cleared/invalidated while building, don't cache this stale result as canonical. - if (gen === this.componentsMtimeIndexGen) this.componentsMtimeIndex = index; - return index; - }) - .finally(() => { - // clear on both success and failure so a transient build error doesn't poison future reads; - // guard against clobbering a newer build started after an invalidation. - if (this.componentsMtimeIndexBuilding === building) this.componentsMtimeIndexBuilding = undefined; - }); - this.componentsMtimeIndexBuilding = building; - } - return this.componentsMtimeIndexBuilding; - } - - /** drop the whole index (e.g. on a full workspace cache clear). */ - clearComponentsMtimeIndex() { - this.componentsMtimeIndex = undefined; - this.componentsMtimeIndexBuilding = undefined; - this.componentsMtimeIndexGen += 1; - } - - /** drop a single component's entry so its next load recomputes it (e.g. on a watch file change). */ - deleteComponentMtimeIndexEntry(rootDir: string) { - this.componentsMtimeIndex?.delete(rootDir); - // if a build is in flight it may already contain this now-stale entry; bump the generation so its - // result won't be cached as canonical, forcing the next read to rebuild (and re-stat this component). - if (this.componentsMtimeIndexBuilding) this.componentsMtimeIndexGen += 1; - } - async getDocsFromCache(filePath: string): Promise<{ timestamp: number; data: string } | null> { return this.getStringDataFromCache(filePath, DOCS); } diff --git a/scopes/workspace/workspace/component-loading-redesign.md b/scopes/workspace/workspace/component-loading-redesign.md index b2aebbd27c48..08e032fff8fb 100644 --- a/scopes/workspace/workspace/component-loading-redesign.md +++ b/scopes/workspace/workspace/component-loading-redesign.md @@ -204,11 +204,12 @@ earlier ones teach us). ### Phase 2 — Quick perf wins on existing seams - [x] Benchmark harness committed + baseline recorded (see §4) — **gate for the rest of the phase** -- [x] Batch the deps-cache invalidation scan: a command-scoped mtime index built from a few batched - `globby` scans, replacing the per-component recursive `globby`. Restricts the node_modules - traversal to the footprint the cache depends on (source + each component's `node_modules`/`@scope` - dir mtimes, no deep tree — auto-detect never traverses _into_ a package). Cuts warm `bit status` - fs syscalls ~37% (74.3k→46.9k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). +- [x] Restrict the deps-cache invalidation scan to the node*modules footprint the cache depends on: + the per-component freshness scan stops at the `node_modules` boundary (auto-detect never traverses + \_into* a package) and only takes each component's `node_modules`/`@scope` dir mtimes — a direct + dep added/removed/relinked bumps them. Cuts warm `bit status` fs syscalls ~37% (74.3k→46.4k); + warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1). Kept per-component (no shared index — it + added complexity for zero syscall benefit and over-scanned single-component commands). - [ ] `on-load` slot-laziness (`loadDocs: false, loadCompositions: false` for non-UI flows) — the largest CPU-bound stage (9.2s) and the next **warm-wall** target - [ ] Lazy file contents in `ModelComponent.toConsumerComponent` (helps `graph`, not `status`) @@ -303,12 +304,11 @@ hypothesis, which deeper sub-step instrumentation disproved):** `Dependency` reconstruction (negligible). `statFiles` was a recursive `globby` per component that **followed the component's `node_modules` symlink into the shared workspace `node_modules`** (226k of 230k scanned entries), run 313× per command. -- **Aggregate self-time ≠ wall — sharply.** Batching that scan and restricting the node*modules - traversal to the footprint the cache depends on (the dependency auto-detect never traverses *into* a - package, so only the component's `node_modules`/`@scope` dir mtimes matter — a dep add/remove/relink - bumps them; the deep tree and transitive store are irrelevant) cut `statFiles` 22.3s→~0.5s aggregate - and **fs syscalls 74.3k→46.9k per warm `bit status` (~37%)** — yet a same-state wall A/B moved warm - wall by **~0.3s**. The removed work is I/O-\_wait* that overlaps with CPU on the single JS thread; on +- **Aggregate self-time ≠ wall — sharply.** Restricting the node*modules traversal to the footprint + the cache depends on (the dependency auto-detect never traverses \_into* a package, so only the + component's `node_modules`/`@scope` dir mtimes matter — a dep add/remove/relink bumps them; the deep + tree and transitive store are irrelevant) cut **fs syscalls 74.3k→46.4k per warm `bit status` + (~37%)** — yet a same-state wall A/B moved warm wall by **~0.3s**. The removed work is I/O-\_wait\* that overlaps with CPU on the single JS thread; on a warm SSD it was never on the critical path (real win on cold/CI/networked filesystems, where it is). `cacheRead` (10.9s) is likewise I/O-wait, so consolidating it would also be warm-wall-neutral. (An earlier iteration _ignored_ node_modules entirely — faster still, but a correctness regression: @@ -367,16 +367,18 @@ hypothesis, which deeper sub-step instrumentation disproved):** `statFiles` 22.3s + `cacheRead` 10.9s aggregate, while `deserialize` is 9ms. `statFiles` was a per-component recursive `globby` that followed each component's `node_modules` symlink into the shared `node_modules` (226k/230k scanned entries), run 313× per command. Shipped the first Phase-2 - perf change: a command-scoped **batched mtime index** (`buildComponentDirsLastModifiedIndex` in - `@teambit/toolbox.fs.last-modified`, memoized on `FsCache`, invalidated via - `workspace.clearAllComponentsCache`/`clearComponentCache`). Result: **warm `bit status` fs syscalls - 74.3k→46.9k (~37%)**, `readFile` traffic unchanged (no regression, checked against the bootstrap - fs-read e2e metric). **Important correctness lesson:** a first cut simply _ignored_ node_modules - (74.3k→44.8k) — but the deps auto-detect resolves imports _through_ node_modules and reads each - direct dep's `package.json` (`name`/`componentId`), so the cache depends on node_modules content; - ignoring it is a regression. The tree builder stops at the package boundary + perf change: restrict that per-component scan to the node*modules footprint the cache depends on — + it stops at the `node_modules` boundary and only takes the `node_modules`/`@scope` dir mtimes + (`getLastModifiedDirTimestampMs` in `@teambit/toolbox.fs.last-modified`). Result: **warm `bit status` + fs syscalls 74.3k→46.4k (~37%)**, `readFile` traffic unchanged (no regression, checked against the + bootstrap fs-read e2e metric). **Important correctness lesson:** a first cut simply \_ignored* + node*modules (74.3k→44.8k) — but the deps auto-detect resolves imports \_through* node_modules and + reads each direct dep's `package.json` (`name`/`componentId`), so the cache depends on node_modules + content; ignoring it is a regression. The tree builder stops at the package boundary (`generate-tree-madge.ts` filter), so only the component's `node_modules`/`@scope` dir mtimes are needed (a dep add/remove/relink bumps them) — not the deep tree. See [[deps-cache-node-modules-invalidation]]. + (A shared command-scoped index was tried and dropped: it added memoization/invalidation complexity + for zero syscall benefit — batching saves no syscalls here — and over-scanned single-component commands.) Key perf lesson reaffirmed: aggregate self-time ≠ wall — a same-state A/B moved warm wall only ~0.3s because the cut work was I/O-wait overlapping CPU on the single JS thread (real win on cold/CI/ networked FS). The warm-wall bottleneck is CPU-bound: `on-load` (9.2s), `dependency-resolution` diff --git a/scopes/workspace/workspace/workspace.ts b/scopes/workspace/workspace/workspace.ts index 2fd0e6985311..ae928f81f139 100644 --- a/scopes/workspace/workspace/workspace.ts +++ b/scopes/workspace/workspace/workspace.ts @@ -833,7 +833,6 @@ it's possible that the version ${component.id.version} belong to ${idStr.split(' this.consumer.componentLoader.clearComponentsCache(); this.componentStatusLoader.clearCache(); this.aggregatedLoadFailures.clear(); - this.consumer.componentFsCache.clearComponentsMtimeIndex(); this._componentList = new ComponentsList(this); } @@ -841,8 +840,6 @@ it's possible that the version ${component.id.version} belong to ${idStr.split(' this.componentLoader.clearComponentCache(id); this.componentStatusLoader.clearOneComponentCache(id); this.consumer.clearOneComponentCache(id); - const componentDir = this.consumer.bitMap.getComponentIfExist(id, { ignoreVersion: true })?.getComponentDir(); - if (componentDir) this.consumer.componentFsCache.deleteComponentMtimeIndexEntry(componentDir); this._componentList = new ComponentsList(this); }