Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { IssuesClasses } from '@teambit/component-issues';
import {
getLastModifiedComponentTimestampMs,
getLastModifiedPathsTimestampMs,
buildDirsLastModifiedIndex,
} from '@teambit/toolbox.fs.last-modified';
import { ExtensionDataEntry } from '@teambit/legacy.extension-data';
import type { DependencyLoaderOpts, ConsumerComponent as Component } from '@teambit/legacy.consumer-component';
Expand Down Expand Up @@ -119,10 +120,7 @@ export class DependenciesLoader {
// to invalidate the cache in such a case.
return null;
}
const filesPaths = this.component.files.map((f) => f.path);
const componentConfigPath = path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME);
filesPaths.push(componentConfigPath);
const lastModifiedComponent = await getLastModifiedComponentTimestampMs(rootDir, filesPaths);
const lastModifiedComponent = await this.getComponentLastModified(workspace, rootDir);
const wasModifiedAfterCache = lastModifiedComponent > cacheData.timestamp;

if (wasModifiedAfterCache) {
Expand All @@ -146,6 +144,29 @@ export class DependenciesLoader {
return DependenciesData.deserialize(cacheData.data);
}

/**
* last-modified time of this component's files, used to decide whether the cached deps are stale.
* reads from a workspace-wide index built with a single filesystem scan and shared across all
* components in the command (instead of a recursive per-component scan — the hot path on large
* workspaces). falls back to a per-component scan when the entry isn't in the index, e.g. after a
* single-component cache clear (watch) or for a component added since the scan.
*/
private async getComponentLastModified(workspace: Workspace, rootDir: string): Promise<number> {
const index = await workspace.consumer.componentFsCache.getOrBuildComponentsMtimeIndex(() =>
buildDirsLastModifiedIndex(
workspace.path,
workspace.consumer.bitMap.getAllComponents().map((componentMap) => componentMap.getComponentDir())
)
);
const fromIndex = index.get(rootDir);
if (fromIndex !== undefined) return fromIndex;
const filesPaths = this.component.files.map((file) => file.path);
filesPaths.push(path.join(workspace.path, rootDir, COMPONENT_CONFIG_FILE_NAME));
const lastModified = await getLastModifiedComponentTimestampMs(rootDir, filesPaths);
index.set(rootDir, lastModified);
return lastModified;
}

private shouldSaveInCache(dependenciesData: DependenciesData, storeInFsCache = true) {
if (!storeInFsCache) return false;
if (!dependenciesData.issues) return true;
Expand Down
1 change: 1 addition & 0 deletions scopes/toolbox/fs/last-modified/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ export {
getLastModifiedPathsTimestampMs,
getPathStatIfExist,
getLastModifiedComponentTimestampMs,
buildDirsLastModifiedIndex,
} from './last-modified';
64 changes: 64 additions & 0 deletions scopes/toolbox/fs/last-modified/last-modified.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import nodePath from 'path';
import globby from 'globby';
import type { Stats } from 'fs-extra';
import fs from 'fs-extra';
import { compact } from 'lodash';

type GlobbyStatEntry = { path: string; stats?: { mtimeMs: number } };

/**
* check recursively all the sub-directories as well
*/
Expand Down Expand Up @@ -37,3 +40,64 @@ export async function getPathStatIfExist(path: string): Promise<Stats | null> {
throw err;
}
}

/**
* find the directory in `dirSet` that owns `relPath` — the deepest dir that is a path-prefix of it.
*/
function ownerDir(relPath: string, dirSet: Set<string>): string | undefined {
const parts = relPath.split('/');
for (let i = parts.length - 1; i > 0; i -= 1) {
const candidate = parts.slice(0, i).join('/');
if (dirSet.has(candidate)) return candidate;
}
return undefined;
}

/**
* build a last-modified index for many directories with a *single* filesystem scan, keyed by each
* input dir (relative to `cwd`). the value is the max mtime over every file and nested directory
* under that dir, plus the dir's own mtime. equivalent to calling `getLastModifiedComponentTimestampMs`
* per dir, but replaces N recursive `globby` scans with one — the hot path on large workspaces.
*
* the per-dir value catches content edits (file mtime), additions/deletions in nested dirs (the
* nested dir's own mtime), and deletions directly under the dir (the dir's own mtime).
*
* `node_modules` is ignored by default: component dirs symlink it to the shared workspace
* `node_modules`, so following it makes the scan ~60x larger and slower. Its contents are also
* irrelevant to source-derived caches (e.g. auto-detected dependencies come from source imports;
* install flows clear those caches explicitly).
*/
export async function buildDirsLastModifiedIndex(
cwd: string,
dirs: string[],
ignore: string[] = ['**/node_modules/**']
): Promise<Map<string, number>> {
const uniqDirs = [...new Set(dirs.filter(Boolean))];
const dirSet = new Set(uniqDirs);
const index = new Map<string, number>();
const bump = (dir: string, mtimeMs: number) => {
const current = index.get(dir);
if (current === undefined || mtimeMs > current) index.set(dir, mtimeMs);
};
// one recursive scan of all dirs, returning files + nested dirs together with their stats.
const entries = (await globby(uniqDirs, {
cwd,
stats: true,
onlyFiles: false,
dot: true,
ignore,
})) as unknown as GlobbyStatEntry[];
for (const entry of entries) {
const owner = ownerDir(entry.path, dirSet);
if (owner) bump(owner, entry.stats?.mtimeMs ?? 0);
}
// globby returns the *contents* of each dir, not the dir itself; stat the dirs so a deletion
// directly under one (which only bumps that dir's own mtime) is still reflected.
await Promise.all(
uniqDirs.map(async (dir) => {
const stat = await getPathStatIfExist(nodePath.join(cwd, dir));
if (stat) bump(dir, stat.mtimeMs);
})
);
return index;
}
37 changes: 37 additions & 0 deletions scopes/workspace/modules/fs-cache/fs-cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,48 @@ const DEPS = 'deps';
export class FsCache {
readonly basePath: PathOsBasedAbsolute;
protected isNoFsCacheFeatureEnabled: boolean;
// command-scoped index of component rootDir -> last-modified mtimeMs, used to invalidate the
// dependencies fs-cache with a single workspace scan instead of a per-component one. invalidated
// by the workspace's clearAllComponentsCache / clearComponentCache (e.g. on watch file changes).
private componentsMtimeIndex?: Map<string, number>;
private componentsMtimeIndexBuilding?: Promise<Map<string, number>>;
private componentsMtimeIndexGen = 0;
constructor(private scopePath: string) {
this.basePath = path.join(this.scopePath, WORKSPACE_CACHE, COMPONENTS_CACHE);
this.isNoFsCacheFeatureEnabled = isFeatureEnabled(NO_FS_CACHE_FEATURE);
}

/**
* return the shared components last-modified index, building it once via `build` and memoizing it
* for the lifetime of this cache (a command, or until invalidated). concurrent first-callers share
* a single build.
*/
async getOrBuildComponentsMtimeIndex(build: () => Promise<Map<string, number>>): Promise<Map<string, number>> {
if (this.componentsMtimeIndex) return this.componentsMtimeIndex;
if (!this.componentsMtimeIndexBuilding) {
const gen = this.componentsMtimeIndexGen;
this.componentsMtimeIndexBuilding = build().then((index) => {
// if the index was cleared while building, don't cache this now-stale result as canonical.
if (gen === this.componentsMtimeIndexGen) this.componentsMtimeIndex = index;
this.componentsMtimeIndexBuilding = undefined;
return index;
});
}
return this.componentsMtimeIndexBuilding;
}
Comment thread
davidfirst marked this conversation as resolved.
Outdated

/** drop the whole index (e.g. on a full workspace cache clear). */
clearComponentsMtimeIndex() {
this.componentsMtimeIndex = undefined;
this.componentsMtimeIndexBuilding = undefined;
this.componentsMtimeIndexGen += 1;
}

/** drop a single component's entry so its next load recomputes it (e.g. on a watch file change). */
deleteComponentMtimeIndexEntry(rootDir: string) {
this.componentsMtimeIndex?.delete(rootDir);
}

async getDocsFromCache(filePath: string): Promise<{ timestamp: number; data: string } | null> {
return this.getStringDataFromCache(filePath, DOCS);
}
Expand Down
80 changes: 55 additions & 25 deletions scopes/workspace/workspace/component-loading-redesign.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Component Loading Redesign

**Status:** Phase 1 shipped; Phase 2 in progress
**Last updated:** 2026-06-15 (code references are against `master` @ `59855b104`; line numbers will drift)
**Last updated:** 2026-06-23 (code references are against `master`; line numbers will drift)

This document is the source of truth for a multi-phase effort to simplify Bit's component-loading
mechanism: fewer caches, a staged (lazy) loading pipeline, a single env/aspect load planner, and a
Expand Down Expand Up @@ -204,11 +204,15 @@ earlier ones teach us).
### Phase 2 — Quick perf wins on existing seams

- [x] Benchmark harness committed + baseline recorded (see §4) — **gate for the rest of the phase**
- [ ] Lazy file contents in `ModelComponent.toConsumerComponent`
- [x] Batch the deps-cache invalidation scan: one `node_modules`-ignoring workspace scan shared via a
command-scoped mtime index, replacing the per-component recursive `globby`. Cuts warm `bit
status` fs syscalls ~40% (74.3k→44.8k); warm-wall-neutral (I/O-wait), helps cold/CI (see §4.1).
- [ ] `on-load` slot-laziness (`loadDocs: false, loadCompositions: false` for non-UI flows) — the
largest CPU-bound stage (9.2s) and the next **warm-wall** target
- [ ] Lazy file contents in `ModelComponent.toConsumerComponent` (helps `graph`, not `status`)
- [ ] `bit deps usage`: ids + stored deps instead of full load
- [ ] IDE metadata endpoint (`api-for-ide.ts`): S0-S2-level data only
- [ ] `bit remove` / forking: drop full-component loads where only ids/paths are used
- [ ] Default `loadDocs: false, loadCompositions: false` for non-UI flows

### Phase 3 — Cache consolidation

Expand Down Expand Up @@ -270,13 +274,13 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency

**`bit status` (warm, ~13s wall):**

| stage | aggregate self-time | ~wall | note |
| ----------------------------------------------------------- | ------------------- | ----- | --------------------------------------------- |
| `legacy-load-deps` | 43s | ~7s | dependency-object materialization (see below) |
| `on-load` (slot handlers: docs, compositions, schema, pkg…) | 10s | ~1.7s | trimmable for non-UI flows |
| `dependency-resolution` (Harmony resolver) | 7.7s | ~1.3s | |
| `execute-load-slot` (own) | 5.8s | ~1s | |
| `consumer-fs-load` (file content reads) | negligible | — | not a `status` cost |
| stage | aggregate self-time | ~wall | note |
| ----------------------------------------------------------- | ------------------- | ----- | ----------------------------------------------------------- |
| `legacy-load-deps` | 43s | ~7s | filesystem traversal in deps-cache invalidation (see below) |
| `on-load` (slot handlers: docs, compositions, schema, pkg…) | 10s | ~1.7s | trimmable for non-UI flows |
| `dependency-resolution` (Harmony resolver) | 7.7s | ~1.3s | |
| `execute-load-slot` (own) | 5.8s | ~1s | |
| `consumer-fs-load` (file content reads) | negligible | — | not a `status` cost |

**`bit graph --json` (warm, ~20s wall; ~7.5s of it is loading):**

Expand All @@ -285,23 +289,35 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency
| `consumer-fs-load` (file content reads) | 5.8s | dominant load cost for graph |
| `legacy-load-deps` | 1.2s | graph uses a lighter load path |

**Key conclusions (validated, not hypothesized):**
**Key conclusions (validated by measurement; these _supersede_ the earlier "object materialization"
hypothesis, which deeper sub-step instrumentation disproved):**

- **The dependency FS cache works.** On a warm `bit status`, dep loading is **635 cache hits, 0
misses/recomputes**. `legacy-load-deps` is _not_ re-resolving dependencies.
- **`status`'s dominant cost (~7s wall) is dependency-object _materialization on cache hit_** —
`DependenciesData.deserialize` + reconstructing full `Dependency`/`DependencyList` objects +
`applyOverrides`, for every component, even though `status` reads little of it. This is the
"all-or-nothing, always fully materialize" problem of §1.1 — **structural, not a cache bug.**
Reducing it needs the staged/lazy-loading work (defer dependency-object construction), **not a
Phase-2 quick fix.** Earlier framing of this as "39s" was aggregate-concurrent self-time, not
wall; wall is ~13s.
- **`graph`'s dominant load cost is file-content reads (`consumer-fs-load`, 5.8s)** → this is what
**lazy file contents** (§2.1) targets; validated as a real win for `graph`/scope-side loads, but
it does **not** help `status` (whose file reads are negligible).
- Implication for Phase-2 ordering: lazy file contents helps `graph`; per-command partial loads help
`deps usage`/IDE/`remove`/forking; `loadDocs/loadCompositions: false` trims `status`'s slot work
(~1.7s). The big `status` number is deferred to the staged-loading phase.
- **`legacy-load-deps` is filesystem I/O, not object materialization.** Sub-step timing of the warm
cache-hit path (313 components, aggregate self-time): `statFiles` **22.3s** + `cacheRead` **10.9s**,
versus `deserialize` **9ms**, `applyOverrides` 0.4s, `updateVersions` 0.1s. The cost is the
deps-cache _read + invalidation_ layer paid per component — **not** `DependenciesData.deserialize` /
`Dependency` reconstruction (negligible). `statFiles` was a recursive `globby` per component that
**followed the component's `node_modules` symlink into the shared workspace `node_modules`** (226k
of 230k scanned entries), run 313× per command.
- **Aggregate self-time ≠ wall — sharply.** Batching that into one `node_modules`-ignoring workspace
scan cut `statFiles` 22.3s→0.15s aggregate and **fs syscalls 74.3k→44.8k per warm `bit status`
(~40%)** — yet a same-state wall A/B moved warm wall by **~0.3s**. The removed work is I/O-_wait_
that overlaps with CPU on the single JS thread; on a warm SSD it was never on the critical path
(real win on cold/CI/networked filesystems, where it is). `cacheRead` (10.9s) is likewise I/O-wait,
so consolidating it would also be warm-wall-neutral.
- **The warm-wall bottleneck is CPU-bound, single-threaded JS** — the stages whose self-time ≈ their
wall contribution: **`on-load` slot handlers (9.2s)**, **`dependency-resolution` (7.2s)**,
**`workspace.get` (5s)**. These — not the deps-cache I/O — are what move warm `status` wall.
- **`graph`'s dominant load cost is file-content reads (`consumer-fs-load`, 5.8s)** → the target for
**lazy file contents** (§2.1); a real win for `graph`/scope-side loads, but it does **not** help
`status` (whose file reads are negligible).
- Implication for Phase-2 ordering: the deps-cache invalidation batch ships as a standalone fs/CPU
efficiency win (helps cold/CI, warm-wall-neutral). The next **warm-wall** target is `on-load`
slot-laziness (`loadDocs`/`loadCompositions: false` for non-UI flows) — the largest CPU-bound stage.
Earlier framing of the deps cost as "39s"/"materialization" was both an aggregate-vs-wall and a
cause misread; corrected here.

---

Expand All @@ -310,7 +326,7 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency
| Phase | State | OpenSpec change | PRs |
| ----------------------- | ----------- | ------------------------------ | --------------------------------------------------- |
| 1 — Observability | done | `component-load-observability` | [#10418](https://github.com/teambit/bit/pull/10418) |
| 2 — Quick perf wins | in progress | — | |
| 2 — Quick perf wins | in progress | — | [#10445](https://github.com/teambit/bit/pull/10445) |
| 3 — Cache consolidation | not started | — | — |
| 4 — Staged pipeline | not started | — | — |
| 5 — Env planner | not started | — | — |
Expand Down Expand Up @@ -340,3 +356,17 @@ Numbers below are aggregate self-time across ~313 components at ~6× concurrency
file-content reads (`consumer-fs-load`, 5.8s) → the target for lazy file contents. (Correction: an
earlier "39s" figure was aggregate-concurrent self-time, not wall; warm wall is ~13s.) Direction
for the next Phase-2 PR intentionally left open.
- 2026-06-23 — Deeper sub-step instrumentation **disproved the "object materialization" conclusion**
(see §4.1). The warm `legacy-load-deps` cost is filesystem I/O in the deps-cache invalidation:
`statFiles` 22.3s + `cacheRead` 10.9s aggregate, while `deserialize` is 9ms. `statFiles` was a
per-component recursive `globby` that followed each component's `node_modules` symlink into the
shared `node_modules` (226k/230k scanned entries), run 313× per command. Shipped the first Phase-2
perf change: a command-scoped, `node_modules`-ignoring **batched mtime index**
(`buildDirsLastModifiedIndex` in `@teambit/toolbox.fs.last-modified`, memoized on `FsCache`,
invalidated via `workspace.clearAllComponentsCache`/`clearComponentCache`). Result: `statFiles`
22.3s→0.15s aggregate, **warm `bit status` fs syscalls 74.3k→44.8k (~40%)**, `readFile` traffic
unchanged (no regression, checked against the bootstrap fs-read e2e metric). Key lesson reaffirmed:
aggregate self-time ≠ wall — a same-state A/B moved warm wall only ~0.3s because the cut work was
I/O-wait overlapping CPU on the single JS thread (real win on cold/CI/networked FS). The warm-wall
bottleneck is CPU-bound: `on-load` (9.2s), `dependency-resolution` (7.2s), `workspace.get` (5s) —
next warm-wall target is `on-load` slot-laziness.
3 changes: 3 additions & 0 deletions scopes/workspace/workspace/workspace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -833,13 +833,16 @@ it's possible that the version ${component.id.version} belong to ${idStr.split('
this.consumer.componentLoader.clearComponentsCache();
this.componentStatusLoader.clearCache();
this.aggregatedLoadFailures.clear();
this.consumer.componentFsCache.clearComponentsMtimeIndex();
this._componentList = new ComponentsList(this);
}

clearComponentCache(id: ComponentID) {
this.componentLoader.clearComponentCache(id);
this.componentStatusLoader.clearOneComponentCache(id);
this.consumer.clearOneComponentCache(id);
const componentDir = this.consumer.bitMap.getComponentIfExist(id, { ignoreVersion: true })?.getComponentDir();
if (componentDir) this.consumer.componentFsCache.deleteComponentMtimeIndexEntry(componentDir);
this._componentList = new ComponentsList(this);
}

Expand Down