Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions pds-fix-dangling-profile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env bash

# Run this script to fix a dangling profile/self record in the actor store.
# This needs to be run directly on the PDS instance.
#
# This script will:
# Remove busted profile index rows (record + record_blob) for app.bsky.actor.profile/self,
# then rebuild the actor repo MST from the record table.
#
# Usage:
# ./fix-dangling-profile.sh did:plc:xxxxxxxxxxxxxx
# ACTOR_STORE_ROOT=/mnt/data/pds/actors ./fix-dangling-profile.sh did:plc:...
# PDS_ENV_FILE=/home/bluesky/pds.env ./fix-dangling-profile.sh did:plc:...
# DRY_RUN=1 ./fix-dangling-profile.sh did:plc:...
#
# Env:
# ACTOR_STORE_ROOT (default: $PDS_ACTOR_STORE_DIRECTORY or $PDS_DATA_DIRECTORY/actors)
# REBUILD_SCRIPT (default: /home/bluesky/current/service/bluesky-run-rebuild.js)
# PDS_ENV_FILE sourced before rebuild if set
# YES=1 skip confirmation
# DRY_RUN=1 print only

set -euo pipefail

# source pds.env variables
set -a
. /home/bluesky/pds.env
set +a

die() { echo "ERROR: $*" >&2; exit 1; }

[[ "${1:-}" == "-h" || "${1:-}" == "--help" ]] && { sed -n '1,25p' "$0"; exit 0; }
[[ -n "${1:-}" ]] || { echo "Usage: $0 <DID>"; exit 1; }

DID="$1"
[[ "$DID" == did:* ]] || die "DID must start with did:"

resolve_actor_root() {
if [[ -n "${ACTOR_STORE_ROOT:-}" ]]; then
printf '%s' "$ACTOR_STORE_ROOT"
elif [[ -n "${PDS_ACTOR_STORE_DIRECTORY:-}" ]]; then
printf '%s' "$PDS_ACTOR_STORE_DIRECTORY"
elif [[ -n "${PDS_DATA_DIRECTORY:-}" ]]; then
printf '%s' "${PDS_DATA_DIRECTORY}/actors"
else
die "Set ACTOR_STORE_ROOT, PDS_ACTOR_STORE_DIRECTORY, or PDS_DATA_DIRECTORY"
fi
}

ACTOR_ROOT="$(resolve_actor_root)"
[[ -d "$ACTOR_ROOT" ]] || die "actor store root not found: $ACTOR_ROOT"

command -v openssl >/dev/null 2>&1 || die "openssl required"
HASH="$(printf '%s' "$DID" | openssl dgst -sha256 | awk '{print $2}')"
SHARD="${HASH:0:2}"
DB="${ACTOR_ROOT}/${SHARD}/${DID}/store.sqlite"

PROFILE_URI="at://${DID}/app.bsky.actor.profile/self"
REBUILD_SCRIPT="${REBUILD_SCRIPT:-/ebsa/bluesky/current/service/run-rebuild.js}"
DRY_RUN="${DRY_RUN:-0}"
YES="${YES:-0}"

[[ -f "$DB" ]] || die "store.sqlite not found: $DB"

if [[ "$DRY_RUN" == "1" ]]; then
echo "DRY_RUN DB=$DB"
echo "DELETE FROM record WHERE uri='$PROFILE_URI';"
echo "DELETE FROM record_blob WHERE recordUri='$PROFILE_URI';"
echo "node $REBUILD_SCRIPT $DID"
exit 0
fi

echo "Actor DB: $DB"
echo "Profile URI: $PROFILE_URI"
echo "Rebuild: node $REBUILD_SCRIPT $DID"
if [[ "$YES" != "1" ]]; then
read -r -p "Proceed? [y/N] " ans || true
[[ "${ans:-}" == "y" || "${ans:-}" == "Y" ]] || die "aborted"
fi

command -v sqlite3 >/dev/null 2>&1 || die "sqlite3 not in PATH"

sqlite3 "$DB" <<SQL
BEGIN IMMEDIATE;
DELETE FROM record WHERE uri = '${PROFILE_URI}';
DELETE FROM record_blob WHERE recordUri = '${PROFILE_URI}';
COMMIT;
SQL

echo "Deleted profile record + record_blob rows."

[[ -f "$REBUILD_SCRIPT" ]] || die "REBUILD_SCRIPT not found: $REBUILD_SCRIPT"
command -v node >/dev/null 2>&1 || die "node not in PATH"

if [[ -n "${PDS_ENV_FILE:-}" ]]; then
[[ -f "$PDS_ENV_FILE" ]] || die "PDS_ENV_FILE not found: $PDS_ENV_FILE"
set -a
# shellcheck disable=SC1090
. "$PDS_ENV_FILE"
set +a
fi

echo "Rebuilding the actor repo..."
node "$REBUILD_SCRIPT" "$DID"
echo "Done rebuilding the actor repo."
echo "--------------------------------"
echo "Rechecking the profile..."
exec "./pds-get-profile.sh" "$DID"
echo "--------------------------------"
42 changes: 42 additions & 0 deletions pds-fix-dangling-profiles-from-list.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash
# Run pds-fix-dangling-profile.sh once per non-empty line in dangling-profile-dids.txt.
#
# Usage:
# ./pds-fix-dangling-from-profile-from-list.sh
# DANGLING_PROFILE_DIDS_FILE=/tmp/dids.txt ./pds-fix-dangling-from-profile-from-list.sh
# FIX_DANGLING_PROFILES_SCRIPT=/path/to/pds-fix-dangling-profile.sh ./pds-fix-dangling-from-profile-list.sh
#
# If fix-dangling-profiles.sh expects the DID on stdin instead of $1, set:
# FIX_READS_STDIN=1 ./run-fix-dangling-from-list.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LIST="${DANGLING_PROFILE_DIDS_FILE:-$SCRIPT_DIR/dangling-profile-dids.txt}"
FIX="${FIX_DANGLING_PROFILE_SCRIPT:-$SCRIPT_DIR/pds-fix-dangling-profile.sh}"
FIX_READS_STDIN="${FIX_READS_STDIN:-0}"

if [[ ! -f "$LIST" ]]; then
echo "ERROR: DID list not found: $LIST" >&2
exit 1
fi

if [[ ! -f "$FIX" ]]; then
echo "ERROR: fix script not found: $FIX" >&2
exit 1
fi

run_fix() {
local did="$1"
if [[ "$FIX_READS_STDIN" == "1" ]]; then
printf '%s\n' "$did" | bash "$FIX"
else
bash "$FIX" "$did"
fi
}

while IFS= read -r did || [[ -n "${did:-}" ]]; do
[[ -z "${did// }" ]] && continue
[[ "$did" =~ ^[[:space:]]*# ]] && continue
run_fix "$did"
done <"$LIST"
130 changes: 130 additions & 0 deletions pds-get-profile.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env bash
# Inspect app.bsky.actor.profile/self for one actor via store.sqlite.
#
# Runs two queries:
# 1) Healthy: record INNER JOIN repo_block (indexed row + leaf block present)
# 2) Dangling: record LEFT JOIN repo_block where block missing (busted index)
#
# Usage:
# ./pds-get-profile.sh <DID> [ACTOR_STORE_ROOT]
# PDS_DATA_DIRECTORY=/mnt/data/pds ./pds-get-profile.sh did:plc:...
# VERBOSE=1 ./pds-get-profile.sh did:plc:... /path/to/actors
#
# Env (optional):
# ACTOR_STORE_ROOT Same as second argument (overrides env chain if set)
# PDS_ACTOR_STORE_DIRECTORY
# PDS_DATA_DIRECTORY Used as ${PDS_DATA_DIRECTORY}/actors when set
# PDS_ENV_FILE If set, sourced: set -a; . "$PDS_ENV_FILE"; set +a
# VERBOSE=1 Include hex(rb.content) for healthy profile rows
#
# Actor path matches PDS ActorStore.getLocation:
# sha256(DID) hex -> first 2 chars / <DID> / store.sqlite

set -euo pipefail

# source pds.env variables
set -a
. /home/bluesky/pds.env
set +a

die() { echo "ERROR: $*" >&2; exit 1; }

usage() {
cat <<'EOF'
Usage: pds-get-profile.sh <DID> [ACTOR_STORE_ROOT]

Print profile/self from the actor store.sqlite: healthy join vs dangling rows.
EOF
exit 0
}

[[ "${1:-}" == "-h" || "${1:-}" == "--help" ]] && usage
[[ -n "${1:-}" ]] || {
echo "Usage: pds-get-profile.sh <DID> [ACTOR_STORE_ROOT]" >&2
exit 1
}

DID="$1"
[[ "$DID" == did:* ]] || die "DID must start with did:"

ACTOR_ROOT_ARG="${2:-}"

if [[ -n "${PDS_ENV_FILE:-}" ]]; then
[[ -f "$PDS_ENV_FILE" ]] || die "PDS_ENV_FILE not found: $PDS_ENV_FILE"
set -a
# shellcheck disable=SC1090
. "$PDS_ENV_FILE"
set +a
fi

resolve_actor_root() {
if [[ -n "${ACTOR_ROOT_ARG:-}" ]]; then
printf '%s' "$ACTOR_ROOT_ARG"
elif [[ -n "${ACTOR_STORE_ROOT:-}" ]]; then
printf '%s' "$ACTOR_STORE_ROOT"
elif [[ -n "${PDS_ACTOR_STORE_DIRECTORY:-}" ]]; then
printf '%s' "$PDS_ACTOR_STORE_DIRECTORY"
elif [[ -n "${PDS_DATA_DIRECTORY:-}" ]]; then
printf '%s' "${PDS_DATA_DIRECTORY}/actors"
else
printf '%s' "./actors"
fi
}

ACTOR_ROOT="$(resolve_actor_root)"
[[ -d "$ACTOR_ROOT" ]] || die "actor store directory not found: $ACTOR_ROOT"

command -v openssl >/dev/null 2>&1 || die "openssl is required"
command -v sqlite3 >/dev/null 2>&1 || die "sqlite3 not in PATH"

HASH="$(printf '%s' "$DID" | openssl dgst -sha256 | awk '{print $2}')"
SHARD="${HASH:0:2}"
DB="${ACTOR_ROOT}/${SHARD}/${DID}/store.sqlite"

[[ -f "$DB" ]] || die "store.sqlite not found: $DB"

VERBOSE="${VERBOSE:-0}"

echo "DID: $DID"
echo "DB: $DB"
echo ""

# --- Healthy: block bytes present (default: length only; VERBOSE=1: hex content)
echo "=== Profile with repo_block (healthy) ==="
if [[ "$VERBOSE" == "1" ]]; then
sqlite3 -header -column "$DB" <<'SQL'
SELECT
r.uri,
r.cid,
r.indexedAt,
LENGTH(rb.content) AS content_bytes,
hex(rb.content) AS content_hex
FROM record AS r
INNER JOIN repo_block AS rb ON rb.cid = r.cid
WHERE r.collection = 'app.bsky.actor.profile'
AND r.rkey = 'self';
SQL
else
sqlite3 -header -column "$DB" <<'SQL'
SELECT
r.uri,
r.cid,
r.indexedAt,
LENGTH(rb.content) AS content_bytes
FROM record AS r
INNER JOIN repo_block AS rb ON rb.cid = r.cid
WHERE r.collection = 'app.bsky.actor.profile'
AND r.rkey = 'self';
SQL
fi

echo ""
echo "=== Profile index row without repo_block (dangling / busted) ==="
sqlite3 -header -column "$DB" <<'SQL'
SELECT r.uri, r.cid, r.indexedAt
FROM record AS r
LEFT JOIN repo_block AS rb ON rb.cid = r.cid
WHERE r.collection = 'app.bsky.actor.profile'
AND r.rkey = 'self'
AND rb.cid IS NULL;
SQL
105 changes: 105 additions & 0 deletions pds-scan-dangling-records.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env bash
# Scan all actor store.sqlite files for dangling record rows (record.cid missing in repo_block).
# Optionally filter to busted app.bsky.actor.profile/self only.
# DIDs with a dangling profile/self record are appended to a file (reset each run).
#
# Usage:
# ./scan-dangling-profiles.sh [ACTOR_STORE_ROOT]
# ONLY_PROFILE=1 ./scan-dangling-profiles.sh /mnt/data/pds/actors
# VERBOSE=1 ONLY_PROFILE=1 ./scan-dangling-profiles.sh
# DANGLING_PROFILE_DIDS_FILE=/tmp/busted-dids.txt ./scan-dangling-profiles.sh
#
# Resolution order for ACTOR_STORE_ROOT:
# $1 > $PDS_ACTOR_STORE_DIRECTORY > $PDS_DATA_DIRECTORY/actors > ./actors

set -euo pipefail

# source pds.env variables
set -a
. /home/bluesky/pds.env
set +a

resolve_actor_root() {
if [[ -n "${1:-}" ]]; then
printf '%s' "$1"
elif [[ -n "${PDS_ACTOR_STORE_DIRECTORY:-}" ]]; then
printf '%s' "$PDS_ACTOR_STORE_DIRECTORY"
elif [[ -n "${PDS_DATA_DIRECTORY:-}" ]]; then
printf '%s' "${PDS_DATA_DIRECTORY}/actors"
else
printf '%s' "./actors"
fi
}

ACTOR_ROOT="$(resolve_actor_root "${1:-}")"

if [[ ! -d "$ACTOR_ROOT" ]]; then
echo "ERROR: actor store directory not found: $ACTOR_ROOT" >&2
exit 1
fi

if ! command -v sqlite3 >/dev/null 2>&1; then
echo "ERROR: sqlite3 not in PATH" >&2
exit 1
fi

DANGLING_PROFILE_DIDS_FILE="${DANGLING_PROFILE_DIDS_FILE:-./dangling-profile-dids.txt}"
: >"$DANGLING_PROFILE_DIDS_FILE"

# total dangling: record row whose cid has no repo_block row
SQL_TOTAL='SELECT COUNT(*) FROM record AS r LEFT JOIN repo_block AS rb ON rb.cid = r.cid WHERE rb.cid IS NULL;'

# dangling specifically for profile/self
SQL_PROFILE='SELECT COUNT(*) FROM record AS r LEFT JOIN repo_block AS rb ON rb.cid = r.cid WHERE rb.cid IS NULL AND r.uri LIKE "%/app.bsky.actor.profile/self";'


# sample of dangling URIs (for debugging)
SQL_LIST='SELECT r.uri || " | " || r.cid FROM record AS r LEFT JOIN repo_block AS rb ON rb.cid = r.cid WHERE rb.cid IS NULL LIMIT 50;'

ONLY_PROFILE="${ONLY_PROFILE:-0}"
VERBOSE="${VERBOSE:-0}"

actors_any=0
actors_profile=0

while IFS= read -r -d '' db; do
tot="$(sqlite3 "$db" "$SQL_TOTAL" 2>/dev/null)" || {
echo "WARN: sqlite3 failed (skip): $db" >&2
continue
}

if [[ -z "${tot:-}" ]] || [[ "$tot" -eq 0 ]]; then
continue
fi

prof="$(sqlite3 "$db" "$SQL_PROFILE" 2>/dev/null)" || prof=0
if [[ "${ONLY_PROFILE}" == "1" ]] && [[ "${prof:-0}" -eq 0 ]]; then
continue
fi

rel="${db#"$ACTOR_ROOT"/}"
echo "DANGLING store=$rel"
echo " total_dangling_rows=$tot profile_self_dangling=$prof"

if [[ "${prof:-0}" -gt 0 ]]; then
# PDS layout: ACTOR_ROOT/<shard>/<did>/store.sqlite
did="$(basename "$(dirname "$rel")")"
printf '%s\n' "$did" >>"$DANGLING_PROFILE_DIDS_FILE"
fi

if [[ "$VERBOSE" == "1" ]]; then
sqlite3 "$db" "$SQL_LIST" 2>/dev/null | sed 's/^/ /' || true
echo
fi

actors_any=$((actors_any + 1))
if [[ "${prof:-0}" -gt 0 ]]; then
actors_profile=$((actors_profile + 1))
fi
done < <(find "$ACTOR_ROOT" -name store.sqlite -type f -print0)

echo "---"
echo "summary: actor_dbs_with_dangling=${actors_any}"
echo "summary: actor_dbs_with_busted_profile_self=${actors_profile}"
echo "scanned_root=${ACTOR_ROOT}"
echo "dangling_profile_dids_file=${DANGLING_PROFILE_DIDS_FILE}"
Loading