From 38eb61252b7628918fcce4f274f6084c04b926dc Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 10:53:16 +0530 Subject: [PATCH 1/7] Fix: Initialize db conn to None to prevent UnboundLocalError (#1028) --- backend/app/database/faces.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index 0e43f7117..7bbe96e02 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -74,7 +74,7 @@ def db_insert_face_embeddings( cursor = conn.cursor() try: - embeddings_json = json.dumps([emb.tolist() for emb in embeddings]) + embeddings_json = json.dumps(embeddings.tolist()) # Convert bbox to JSON string if provided bbox_json = json.dumps(bbox) if bbox is not None else None @@ -141,7 +141,7 @@ def db_insert_face_embeddings_by_image_id( ) -def get_all_face_embeddings(): +def get_all_face_embeddings() -> List[Dict]: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() @@ -182,8 +182,8 @@ def get_all_face_embeddings(): ) in results: if image_id not in images_dict: try: - embeddings_json = json.loads(embeddings) - bbox_json = json.loads(bbox) + embeddings_json = json.loads(embeddings) if embeddings else None + bbox_json = json.loads(bbox) if bbox else None except json.JSONDecodeError: continue images_dict[image_id] = { @@ -307,6 +307,7 @@ def db_update_face_cluster_ids_batch( if not face_cluster_mapping: return + conn = None own_connection = cursor is None if own_connection: conn = sqlite3.connect(DATABASE_PATH) @@ -329,19 +330,19 @@ def db_update_face_cluster_ids_batch( update_data, ) - if own_connection: + if own_connection and conn: conn.commit() except Exception: - if own_connection: + if own_connection and conn: conn.rollback() print("Error updating face cluster IDs in batch.") raise finally: - if own_connection: + if own_connection and conn: conn.close() -def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[str, FaceEmbedding]]]: +def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding]]]: """ Get cluster IDs and their corresponding mean face embeddings. @@ -391,4 +392,4 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[str, FaceEmbedding] return cluster_means finally: - conn.close() + conn.close() \ No newline at end of file From cf14f8b2e20360581a28d71cf846c967880bc1cc Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 11:18:59 +0530 Subject: [PATCH 2/7] fix(#1027): return all faces in get_all_face_embeddings --- backend/app/database/faces.py | 109 +++++++++++++++++++++++----------- frontend/package-lock.json | 7 ++- frontend/package.json | 1 + 3 files changed, 79 insertions(+), 38 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index 7bbe96e02..fad8c236f 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -1,7 +1,7 @@ import sqlite3 import json import numpy as np -from typing import Optional, List, Dict, Union, TypedDict +from typing import Optional, List, Dict, Union, TypedDict, Any from app.config.settings import DATABASE_PATH # Type definitions @@ -58,7 +58,7 @@ def db_insert_face_embeddings( confidence: Optional[float] = None, bbox: Optional[BoundingBox] = None, cluster_id: Optional[ClusterId] = None, -) -> FaceId: +) -> Optional[FaceId]: """ Insert face embeddings with additional metadata. @@ -69,6 +69,9 @@ def db_insert_face_embeddings( confidence: Confidence score for face detection (optional) bbox: Bounding box coordinates as dict with keys: x, y, width, height (optional) cluster_id: ID of the face cluster this face belongs to (optional) + + Returns: + FaceId if successful, None if insert failed """ conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() @@ -100,7 +103,7 @@ def db_insert_face_embeddings_by_image_id( confidence: Optional[Union[float, List[float]]] = None, bbox: Optional[Union[BoundingBox, List[BoundingBox]]] = None, cluster_id: Optional[Union[ClusterId, List[ClusterId]]] = None, -) -> Union[FaceId, List[FaceId]]: +) -> Union[Optional[FaceId], List[Optional[FaceId]]]: """ Insert face embeddings using image path (convenience function). @@ -110,6 +113,9 @@ def db_insert_face_embeddings_by_image_id( confidence: Confidence score(s) for face detection (optional) bbox: Bounding box coordinates or list of bounding boxes (optional) cluster_id: Cluster ID(s) for the face(s) (optional) + + Returns: + FaceId or list of FaceIds. Can be None if insert failed. """ # Handle multiple faces in one image @@ -118,30 +124,58 @@ def db_insert_face_embeddings_by_image_id( and len(embeddings) > 0 and isinstance(embeddings[0], np.ndarray) ): - face_ids = [] + face_ids: List[Optional[FaceId]] = [] for i, emb in enumerate(embeddings): - conf = ( - confidence[i] - if isinstance(confidence, list) and i < len(confidence) - else confidence - ) - bb = bbox[i] if isinstance(bbox, list) and i < len(bbox) else bbox - cid = ( - cluster_id[i] - if isinstance(cluster_id, list) and i < len(cluster_id) - else cluster_id - ) + # Extract single confidence value + conf: Optional[float] = None + if isinstance(confidence, list) and i < len(confidence): + conf = confidence[i] + elif isinstance(confidence, (int, float)): + conf = float(confidence) + + # Extract single bbox value + bb: Optional[BoundingBox] = None + if isinstance(bbox, list) and i < len(bbox): + bb = bbox[i] + elif isinstance(bbox, dict): + bb = bbox + + # Extract single cluster_id value + cid: Optional[ClusterId] = None + if isinstance(cluster_id, list) and i < len(cluster_id): + cid = cluster_id[i] + elif isinstance(cluster_id, int): + cid = cluster_id + face_id = db_insert_face_embeddings(image_id, emb, conf, bb, cid) face_ids.append(face_id) return face_ids else: - # Single face + # Single face - extract single values from potential lists + single_conf: Optional[float] = None + if isinstance(confidence, list) and len(confidence) > 0: + single_conf = confidence[0] + elif isinstance(confidence, (int, float)): + single_conf = float(confidence) + + single_bbox: Optional[BoundingBox] = None + if isinstance(bbox, list) and len(bbox) > 0: + single_bbox = bbox[0] + elif isinstance(bbox, dict): + single_bbox = bbox + + single_cid: Optional[ClusterId] = None + if isinstance(cluster_id, list) and len(cluster_id) > 0: + single_cid = cluster_id[0] + elif isinstance(cluster_id, int): + single_cid = cluster_id + return db_insert_face_embeddings( - image_id, embeddings, confidence, bbox, cluster_id + image_id, embeddings, single_conf, single_bbox, single_cid ) -def get_all_face_embeddings() -> List[Dict]: +def get_all_face_embeddings() -> List[Dict[str, Any]]: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() @@ -151,11 +185,11 @@ def get_all_face_embeddings() -> List[Dict]: SELECT f.embeddings, f.bbox, - i.id, - i.path, - i.folder_id, - i.thumbnailPath, - i.metadata, + i.id, + i.path, + i.folder_id, + i.thumbnailPath, + i.metadata, i.isTagged, m.name as tag_name FROM faces f @@ -168,7 +202,7 @@ def get_all_face_embeddings() -> List[Dict]: from app.utils.images import image_util_parse_metadata - images_dict = {} + images_dict: Dict[str, Dict[str, Any]] = {} for ( embeddings, bbox, @@ -203,7 +237,7 @@ def get_all_face_embeddings() -> List[Dict]: images_dict[image_id]["tags"].append(tag_name) # Convert to list and set tags to None if empty - images = [] + images: List[Dict[str, Any]] = [] for image_data in images_dict.values(): if not image_data["tags"]: image_data["tags"] = None @@ -231,7 +265,7 @@ def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbed rows = cursor.fetchall() - faces = [] + faces: List[Dict[str, Union[FaceId, FaceEmbedding]]] = [] for row in rows: face_id, embeddings_json = row # Convert JSON string back to numpy array @@ -267,7 +301,7 @@ def db_get_all_faces_with_cluster_names() -> ( rows = cursor.fetchall() - faces = [] + faces: List[Dict[str, Union[FaceId, FaceEmbedding, Optional[str]]]] = [] for row in rows: face_id, embeddings_json, cluster_name = row # Convert JSON string back to numpy array @@ -286,7 +320,7 @@ def db_get_all_faces_with_cluster_names() -> ( def db_update_face_cluster_ids_batch( - face_cluster_mapping: List[Dict[str, Union[FaceId, ClusterId]]], + face_cluster_mapping: List[Dict[str, Union[FaceId, ClusterId, None]]], cursor: Optional[sqlite3.Cursor] = None, ) -> None: """ @@ -299,8 +333,8 @@ def db_update_face_cluster_ids_batch( Example: face_cluster_mapping = [ - {'face_id': 1, 'cluster_id': 'uuid-cluster-1'}, - {'face_id': 2, 'cluster_id': 'uuid-cluster-2'}, + {'face_id': 1, 'cluster_id': 1}, + {'face_id': 2, 'cluster_id': 2}, {'face_id': 3, 'cluster_id': None} # To unassign cluster ] """ @@ -309,13 +343,18 @@ def db_update_face_cluster_ids_batch( conn = None own_connection = cursor is None + if own_connection: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() + # At this point cursor should never be None + if cursor is None: + raise ValueError("Database cursor is required") + try: # Prepare update data as tuples (cluster_id, face_id) - update_data = [] + update_data: List[tuple] = [] for mapping in face_cluster_mapping: face_id = mapping.get("face_id") cluster_id = mapping.get("cluster_id") @@ -323,8 +362,8 @@ def db_update_face_cluster_ids_batch( cursor.executemany( """ - UPDATE faces - SET cluster_id = ? + UPDATE faces + SET cluster_id = ? WHERE face_id = ? """, update_data, @@ -369,7 +408,7 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] return [] # Group embeddings by cluster_id - cluster_embeddings = {} + cluster_embeddings: Dict[int, List[np.ndarray]] = {} for row in rows: cluster_id, embeddings_json = row # Convert JSON string back to numpy array @@ -380,7 +419,7 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] cluster_embeddings[cluster_id].append(embeddings) # Calculate mean embeddings for each cluster - cluster_means = [] + cluster_means: List[Dict[str, Union[int, FaceEmbedding]]] = [] for cluster_id, embeddings_list in cluster_embeddings.items(): # Stack all embeddings for this cluster and calculate mean stacked_embeddings = np.stack(embeddings_list) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index ab218ecaf..8924b012a 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -73,6 +73,7 @@ "@vitejs/plugin-react": "^4.2.1", "autoprefixer": "^10.4.20", "babel-jest": "^29.7.0", + "baseline-browser-mapping": "^2.9.14", "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", "eslint-config-react-app": "^7.0.1", @@ -6588,9 +6589,9 @@ "license": "MIT" }, "node_modules/baseline-browser-mapping": { - "version": "2.8.20", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.20.tgz", - "integrity": "sha512-JMWsdF+O8Orq3EMukbUN1QfbLK9mX2CkUmQBcW2T0s8OmdAUL5LLM/6wFwSrqXzlXB13yhyK9gTKS1rIizOduQ==", + "version": "2.9.14", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.14.tgz", + "integrity": "sha512-B0xUquLkiGLgHhpPBqvl7GWegWBUNuujQ6kXd/r1U38ElPT6Ok8KZ8e+FpUGEc2ZoRQUzq/aUnaKFc/svWUGSg==", "dev": true, "license": "Apache-2.0", "bin": { diff --git a/frontend/package.json b/frontend/package.json index 0a53f1b8d..695b7e1d6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -88,6 +88,7 @@ "@vitejs/plugin-react": "^4.2.1", "autoprefixer": "^10.4.20", "babel-jest": "^29.7.0", + "baseline-browser-mapping": "^2.9.14", "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", "eslint-config-react-app": "^7.0.1", From ebbcdecb75fe05f5ecc1c31b37deb32870c696aa Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 12:28:17 +0530 Subject: [PATCH 3/7] chore: revert accidental frontend dependency changes --- backend/app/database/faces.py | 180 ++++++++++++++++++++++------------ frontend/package-lock.json | 7 +- frontend/package.json | 1 - 3 files changed, 120 insertions(+), 68 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index fad8c236f..66f0abaac 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -27,6 +27,7 @@ class FaceData(TypedDict): def db_create_faces_table() -> None: + """Create the faces table if it doesn't exist.""" conn = None try: conn = sqlite3.connect(DATABASE_PATH) @@ -47,6 +48,9 @@ def db_create_faces_table() -> None: """ ) conn.commit() + except sqlite3.Error as e: + print(f"Error creating faces table: {e}") + raise finally: if conn is not None: conn.close() @@ -62,7 +66,6 @@ def db_insert_face_embeddings( """ Insert face embeddings with additional metadata. - Args: image_id: ID of the image this face belongs to embeddings: Face embedding vector (numpy array) @@ -73,10 +76,11 @@ def db_insert_face_embeddings( Returns: FaceId if successful, None if insert failed """ - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - + conn = None try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + embeddings_json = json.dumps(embeddings.tolist()) # Convert bbox to JSON string if provided @@ -86,15 +90,21 @@ def db_insert_face_embeddings( """ INSERT INTO faces (image_id, cluster_id, embeddings, confidence, bbox) VALUES (?, ?, ?, ?, ?) - """, + """, (image_id, cluster_id, embeddings_json, confidence, bbox_json), ) face_id = cursor.lastrowid conn.commit() return face_id + except sqlite3.Error as e: + print(f"Error inserting face embeddings: {e}") + if conn: + conn.rollback() + return None finally: - conn.close() + if conn is not None: + conn.close() def db_insert_face_embeddings_by_image_id( @@ -176,13 +186,27 @@ def db_insert_face_embeddings_by_image_id( def get_all_face_embeddings() -> List[Dict[str, Any]]: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() + """ + Get all face embeddings with associated image data. + + Returns: + List of dictionaries, one per face, containing face embeddings, bbox, + face_id, and associated image information including tags. + Note: + This function returns one entry per face. Images with multiple faces + will have multiple entries with the same image_id but different face data. + """ + conn = None try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + # Get all faces with their image data cursor.execute( """ SELECT + f.face_id, f.embeddings, f.bbox, i.id, @@ -190,20 +214,40 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: i.folder_id, i.thumbnailPath, i.metadata, - i.isTagged, - m.name as tag_name + i.isTagged + FROM faces f + JOIN images i ON f.image_id = i.id + ORDER BY i.path, f.face_id + """ + ) + face_results = cursor.fetchall() + + # Get tags for all images that have faces + cursor.execute( + """ + SELECT DISTINCT i.id, m.name as tag_name FROM faces f - JOIN images i ON f.image_id=i.id + JOIN images i ON f.image_id = i.id LEFT JOIN image_classes ic ON i.id = ic.image_id LEFT JOIN mappings m ON ic.class_id = m.class_id - """ + WHERE m.name IS NOT NULL + """ ) - results = cursor.fetchall() + tag_results = cursor.fetchall() from app.utils.images import image_util_parse_metadata - images_dict: Dict[str, Dict[str, Any]] = {} + # Build a mapping of image_id to tags + image_tags: Dict[str, List[str]] = {} + for image_id, tag_name in tag_results: + if image_id not in image_tags: + image_tags[image_id] = [] + if tag_name not in image_tags[image_id]: + image_tags[image_id].append(tag_name) + + faces: List[Dict[str, Any]] = [] for ( + face_id, embeddings, bbox, image_id, @@ -212,42 +256,37 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: thumbnail_path, metadata, is_tagged, - tag_name, - ) in results: - if image_id not in images_dict: - try: - embeddings_json = json.loads(embeddings) if embeddings else None - bbox_json = json.loads(bbox) if bbox else None - except json.JSONDecodeError: - continue - images_dict[image_id] = { - "embeddings": embeddings_json, - "bbox": bbox_json, - "id": image_id, - "path": path, - "folder_id": folder_id, - "thumbnailPath": thumbnail_path, - "metadata": image_util_parse_metadata(metadata), - "isTagged": bool(is_tagged), - "tags": [], - } - - # Add tag if it exists - if tag_name: - images_dict[image_id]["tags"].append(tag_name) + ) in face_results: + try: + embeddings_json = json.loads(embeddings) if embeddings else None + bbox_json = json.loads(bbox) if bbox else None + except json.JSONDecodeError: + continue + + tags = image_tags.get(image_id) + if tags is not None and len(tags) == 0: + tags = None + + faces.append({ + "face_id": face_id, + "embeddings": embeddings_json, + "bbox": bbox_json, + "id": image_id, + "path": path, + "folder_id": folder_id, + "thumbnailPath": thumbnail_path, + "metadata": image_util_parse_metadata(metadata), + "isTagged": bool(is_tagged), + "tags": tags, + }) - # Convert to list and set tags to None if empty - images: List[Dict[str, Any]] = [] - for image_data in images_dict.values(): - if not image_data["tags"]: - image_data["tags"] = None - images.append(image_data) - - # Sort by path - images.sort(key=lambda x: x["path"]) - return images + return faces + except sqlite3.Error as e: + print(f"Error getting face embeddings: {e}") + return [] finally: - conn.close() + if conn is not None: + conn.close() def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbedding]]]: @@ -257,10 +296,11 @@ def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbed Returns: List of dictionaries containing face_id and embeddings (as numpy array) """ - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - + conn = None try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + cursor.execute("SELECT face_id, embeddings FROM faces WHERE cluster_id IS NULL") rows = cursor.fetchall() @@ -273,8 +313,12 @@ def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbed faces.append({"face_id": face_id, "embeddings": embeddings}) return faces + except sqlite3.Error as e: + print(f"Error getting unassigned faces: {e}") + return [] finally: - conn.close() + if conn is not None: + conn.close() def db_get_all_faces_with_cluster_names() -> ( @@ -286,10 +330,11 @@ def db_get_all_faces_with_cluster_names() -> ( Returns: List of dictionaries containing face_id, embeddings (as numpy array), and cluster_name """ - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - + conn = None try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + cursor.execute( """ SELECT f.face_id, f.embeddings, fc.cluster_name @@ -315,8 +360,12 @@ def db_get_all_faces_with_cluster_names() -> ( ) return faces + except sqlite3.Error as e: + print(f"Error getting faces with cluster names: {e}") + return [] finally: - conn.close() + if conn is not None: + conn.close() def db_update_face_cluster_ids_batch( @@ -343,7 +392,7 @@ def db_update_face_cluster_ids_batch( conn = None own_connection = cursor is None - + if own_connection: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() @@ -371,10 +420,10 @@ def db_update_face_cluster_ids_batch( if own_connection and conn: conn.commit() - except Exception: + except sqlite3.Error as e: if own_connection and conn: conn.rollback() - print("Error updating face cluster IDs in batch.") + print(f"Error updating face cluster IDs in batch: {e}") raise finally: if own_connection and conn: @@ -389,10 +438,11 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] List of dictionaries containing cluster_id and mean_embedding (as numpy array) Only returns clusters that have at least one face assigned """ - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - + conn = None try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + cursor.execute( """ SELECT f.cluster_id, f.embeddings @@ -430,5 +480,9 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] ) return cluster_means + except sqlite3.Error as e: + print(f"Error getting cluster mean embeddings: {e}") + return [] finally: - conn.close() \ No newline at end of file + if conn is not None: + conn.close() \ No newline at end of file diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 8924b012a..ab218ecaf 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -73,7 +73,6 @@ "@vitejs/plugin-react": "^4.2.1", "autoprefixer": "^10.4.20", "babel-jest": "^29.7.0", - "baseline-browser-mapping": "^2.9.14", "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", "eslint-config-react-app": "^7.0.1", @@ -6589,9 +6588,9 @@ "license": "MIT" }, "node_modules/baseline-browser-mapping": { - "version": "2.9.14", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.14.tgz", - "integrity": "sha512-B0xUquLkiGLgHhpPBqvl7GWegWBUNuujQ6kXd/r1U38ElPT6Ok8KZ8e+FpUGEc2ZoRQUzq/aUnaKFc/svWUGSg==", + "version": "2.8.20", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.20.tgz", + "integrity": "sha512-JMWsdF+O8Orq3EMukbUN1QfbLK9mX2CkUmQBcW2T0s8OmdAUL5LLM/6wFwSrqXzlXB13yhyK9gTKS1rIizOduQ==", "dev": true, "license": "Apache-2.0", "bin": { diff --git a/frontend/package.json b/frontend/package.json index 695b7e1d6..0a53f1b8d 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -88,7 +88,6 @@ "@vitejs/plugin-react": "^4.2.1", "autoprefixer": "^10.4.20", "babel-jest": "^29.7.0", - "baseline-browser-mapping": "^2.9.14", "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", "eslint-config-react-app": "^7.0.1", From 4cb7a38291f5fbb854e7b899f7b749073ba860e5 Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 12:33:17 +0530 Subject: [PATCH 4/7] Fix: correct logic for all faces and revert frontend deps --- backend/app/database/faces.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index 66f0abaac..7e7986182 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -128,7 +128,7 @@ def db_insert_face_embeddings_by_image_id( FaceId or list of FaceIds. Can be None if insert failed. """ - # Handle multiple faces in one image + # Handle multiple faces in one image (Check if it's a list of numpy arrays) if ( isinstance(embeddings, list) and len(embeddings) > 0 @@ -190,19 +190,17 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: Get all face embeddings with associated image data. Returns: - List of dictionaries, one per face, containing face embeddings, bbox, - face_id, and associated image information including tags. - - Note: - This function returns one entry per face. Images with multiple faces - will have multiple entries with the same image_id but different face data. + List of dictionaries, one per face. + Note: Images with multiple faces will appear multiple times, + once for each face entry. """ conn = None try: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() - # Get all faces with their image data + # Step 1: Get all faces with their image data + # We explicitly select face_id to ensure uniqueness cursor.execute( """ SELECT @@ -222,7 +220,8 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: ) face_results = cursor.fetchall() - # Get tags for all images that have faces + # Step 2: Get tags for all images that have faces + # We do this separately to avoid a Cartesian product in the main query cursor.execute( """ SELECT DISTINCT i.id, m.name as tag_name @@ -237,7 +236,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: from app.utils.images import image_util_parse_metadata - # Build a mapping of image_id to tags + # Build a mapping of image_id -> list of tags image_tags: Dict[str, List[str]] = {} for image_id, tag_name in tag_results: if image_id not in image_tags: @@ -245,6 +244,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: if tag_name not in image_tags[image_id]: image_tags[image_id].append(tag_name) + # Step 3: Construct the result list (one entry per face) faces: List[Dict[str, Any]] = [] for ( face_id, @@ -263,6 +263,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: except json.JSONDecodeError: continue + # Attach tags belonging to this image tags = image_tags.get(image_id) if tags is not None and len(tags) == 0: tags = None @@ -271,6 +272,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: "face_id": face_id, "embeddings": embeddings_json, "bbox": bbox_json, + # Image Metadata "id": image_id, "path": path, "folder_id": folder_id, @@ -379,13 +381,6 @@ def db_update_face_cluster_ids_batch( face_cluster_mapping: List of dictionaries containing face_id and cluster_id pairs Each dict should have keys: 'face_id' and 'cluster_id' cursor: Optional existing database cursor. If None, creates a new connection. - - Example: - face_cluster_mapping = [ - {'face_id': 1, 'cluster_id': 1}, - {'face_id': 2, 'cluster_id': 2}, - {'face_id': 3, 'cluster_id': None} # To unassign cluster - ] """ if not face_cluster_mapping: return From 630a0fb6350b042e2e87c4dd1a425d4d6ddd32fe Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 14:19:07 +0530 Subject: [PATCH 5/7] Fix: Handle 2D numpy arrays, enable Foreign Keys, and prevent Null crashes --- backend/app/database/faces.py | 438 ++++++++++++++++++++++++++++++++++ 1 file changed, 438 insertions(+) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index 7e7986182..c9127fc19 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -14,7 +14,445 @@ class FaceData(TypedDict): """Represents the full faces table structure""" +import sqlite3 +import json +import numpy as np +from typing import Optional, List, Dict, Union, TypedDict, Any +from app.config.settings import DATABASE_PATH + +# Type definitions +FaceId = int +ImageId = str +ClusterId = str +FaceEmbedding = np.ndarray # 512-dim vector +BoundingBox = Dict[str, int] # {'x': int, 'y': int, 'width': int, 'height': int} + + +class FaceData(TypedDict): + face_id: FaceId + image_id: ImageId + embeddings: FaceEmbedding + confidence: Optional[float] + bbox: Optional[BoundingBox] + cluster_id: Optional[ClusterId] + + +def get_db_conn(): + """Helper to get connection with Foreign Keys enabled.""" + conn = sqlite3.connect(DATABASE_PATH) + conn.execute("PRAGMA foreign_keys = ON") + return conn + + +def db_create_faces_table() -> None: + """Create the faces table if it doesn't exist.""" + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS faces ( + face_id INTEGER PRIMARY KEY AUTOINCREMENT, + image_id TEXT NOT NULL, + cluster_id TEXT, + embeddings JSON NOT NULL, + confidence REAL, + bbox JSON, + FOREIGN KEY(image_id) REFERENCES images(id) ON DELETE CASCADE + ) + """ + ) + conn.commit() + except sqlite3.Error as e: + print(f"Error creating faces table: {e}") + raise + finally: + if conn is not None: + conn.close() + +def db_insert_face_embeddings( + image_id: ImageId, + embeddings: FaceEmbedding, + confidence: Optional[float] = None, + bbox: Optional[BoundingBox] = None, + cluster_id: Optional[ClusterId] = None, +) -> Optional[FaceId]: + """ + Insert face embeddings with additional metadata. + """ + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + + embeddings_json = json.dumps(embeddings.tolist()) + bbox_json = json.dumps(bbox) if bbox is not None else None + + cursor.execute( + """ + INSERT INTO faces (image_id, cluster_id, embeddings, confidence, bbox) + VALUES (?, ?, ?, ?, ?) + """, + (image_id, cluster_id, embeddings_json, confidence, bbox_json), + ) + + face_id = cursor.lastrowid + conn.commit() + return face_id + except sqlite3.Error as e: + print(f"Error inserting face embeddings: {e}") + if conn: + conn.rollback() + return None + finally: + if conn is not None: + conn.close() + + +def db_insert_face_embeddings_by_image_id( + image_id: ImageId, + embeddings: Union[FaceEmbedding, List[FaceEmbedding]], + confidence: Optional[Union[float, List[Optional[float]]]] = None, + bbox: Optional[Union[BoundingBox, List[Optional[BoundingBox]]]] = None, + cluster_id: Optional[Union[ClusterId, List[Optional[ClusterId]]]] = None, +) -> Union[Optional[FaceId], List[Optional[FaceId]]]: + """ + Insert face embeddings with robust input handling. + Safely handles single arrays, lists of arrays, and 2D numpy arrays. + """ + + # 1. Handle Empty List (Prevent Crash) + if isinstance(embeddings, list) and len(embeddings) == 0: + return [] + + # 2. Handle 2D Numpy Array (Fixes CodeRabbit Issue) + # If input is (N, 512), convert it to list of N arrays + if isinstance(embeddings, np.ndarray) and embeddings.ndim == 2: + embeddings = list(embeddings) + + # Check if we are handling a list of embeddings + is_list_input = isinstance(embeddings, list) and len(embeddings) > 0 and isinstance(embeddings[0], np.ndarray) + + if is_list_input: + face_ids: List[Optional[FaceId]] = [] + for i, emb in enumerate(embeddings): + # Extract single confidence value safely + conf: Optional[float] = None + if isinstance(confidence, list) and i < len(confidence): + conf = confidence[i] + elif isinstance(confidence, (int, float)): + conf = float(confidence) + + # Extract single bbox value safely + bb: Optional[BoundingBox] = None + if isinstance(bbox, list) and i < len(bbox): + bb = bbox[i] + elif isinstance(bbox, dict): + bb = bbox + + # Extract single cluster_id value safely + cid: Optional[ClusterId] = None + if isinstance(cluster_id, list) and i < len(cluster_id): + cid = cluster_id[i] + elif isinstance(cluster_id, str): + cid = cluster_id + + face_id = db_insert_face_embeddings(image_id, emb, conf, bb, cid) + face_ids.append(face_id) + return face_ids + else: + # Single face - extract single values from potential lists + single_conf: Optional[float] = None + if isinstance(confidence, list) and len(confidence) > 0: + single_conf = confidence[0] + elif isinstance(confidence, (int, float)): + single_conf = float(confidence) + + single_bbox: Optional[BoundingBox] = None + if isinstance(bbox, list) and len(bbox) > 0: + single_bbox = bbox[0] + elif isinstance(bbox, dict): + single_bbox = bbox + + single_cid: Optional[ClusterId] = None + if isinstance(cluster_id, list) and len(cluster_id) > 0: + single_cid = cluster_id[0] + elif isinstance(cluster_id, str): + single_cid = cluster_id + + return db_insert_face_embeddings( + image_id, embeddings, single_conf, single_bbox, single_cid + ) + + +def get_all_face_embeddings() -> List[Dict[str, Any]]: + """ + Get all face embeddings with associated image data. + Filters out corrupted records where embeddings are missing. + """ + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + + # Step 1: Get all faces with their image data + cursor.execute( + """ + SELECT + f.face_id, + f.embeddings, + f.bbox, + i.id, + i.path, + i.folder_id, + i.thumbnailPath, + i.metadata, + i.isTagged + FROM faces f + JOIN images i ON f.image_id = i.id + ORDER BY i.path, f.face_id + """ + ) + face_results = cursor.fetchall() + + # Step 2: Get tags for all images that have faces + cursor.execute( + """ + SELECT DISTINCT i.id, m.name as tag_name + FROM faces f + JOIN images i ON f.image_id = i.id + LEFT JOIN image_classes ic ON i.id = ic.image_id + LEFT JOIN mappings m ON ic.class_id = m.class_id + WHERE m.name IS NOT NULL + """ + ) + tag_results = cursor.fetchall() + + from app.utils.images import image_util_parse_metadata + + # Build a mapping of image_id -> list of tags + image_tags: Dict[str, List[str]] = {} + for image_id, tag_name in tag_results: + if image_id not in image_tags: + image_tags[image_id] = [] + if tag_name not in image_tags[image_id]: + image_tags[image_id].append(tag_name) + + # Step 3: Construct the result list + faces: List[Dict[str, Any]] = [] + for ( + face_id, + embeddings, + bbox, + image_id, + path, + folder_id, + thumbnail_path, + metadata, + is_tagged, + ) in face_results: + try: + # CRITICAL FIX: Handle Null/Empty embeddings to prevent downstream crashes + if not embeddings: + continue + + embeddings_json = json.loads(embeddings) + bbox_json = json.loads(bbox) if bbox else None + + if embeddings_json is None: + continue + + except json.JSONDecodeError: + print(f"Error decoding JSON for face {face_id}") + continue + + # Attach tags belonging to this image + tags = image_tags.get(image_id) + if tags is not None and len(tags) == 0: + tags = None + + faces.append({ + "face_id": face_id, + "embeddings": embeddings_json, + "bbox": bbox_json, + # Image Metadata + "id": image_id, + "path": path, + "folder_id": folder_id, + "thumbnailPath": thumbnail_path, + "metadata": image_util_parse_metadata(metadata), + "isTagged": bool(is_tagged), + "tags": tags, + }) + + return faces + except sqlite3.Error as e: + print(f"Error getting face embeddings: {e}") + return [] + finally: + if conn is not None: + conn.close() + + +def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbedding]]]: + """Get faces that haven't been assigned to a cluster yet.""" + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + + cursor.execute("SELECT face_id, embeddings FROM faces WHERE cluster_id IS NULL") + rows = cursor.fetchall() + + faces: List[Dict[str, Union[FaceId, FaceEmbedding]]] = [] + for row in rows: + face_id, embeddings_json = row + if not embeddings_json: + continue + embeddings = np.array(json.loads(embeddings_json)) + faces.append({"face_id": face_id, "embeddings": embeddings}) + + return faces + except sqlite3.Error as e: + print(f"Error getting unassigned faces: {e}") + return [] + finally: + if conn is not None: + conn.close() + + +def db_get_all_faces_with_cluster_names() -> List[Dict[str, Union[FaceId, FaceEmbedding, Optional[str]]]]: + """Get all faces with their cluster names.""" + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + + cursor.execute( + """ + SELECT f.face_id, f.embeddings, fc.cluster_name + FROM faces f + LEFT JOIN face_clusters fc ON f.cluster_id = fc.cluster_id + """ + ) + rows = cursor.fetchall() + + faces: List[Dict[str, Union[FaceId, FaceEmbedding, Optional[str]]]] = [] + for row in rows: + face_id, embeddings_json, cluster_name = row + if not embeddings_json: + continue + embeddings = np.array(json.loads(embeddings_json)) + faces.append( + { + "face_id": face_id, + "embeddings": embeddings, + "cluster_name": cluster_name, + } + ) + + return faces + except sqlite3.Error as e: + print(f"Error getting faces with cluster names: {e}") + return [] + finally: + if conn is not None: + conn.close() + + +def db_update_face_cluster_ids_batch( + face_cluster_mapping: List[Dict[str, Union[FaceId, ClusterId, None]]], + cursor: Optional[sqlite3.Cursor] = None, +) -> None: + """Update cluster IDs for multiple faces in batch.""" + if not face_cluster_mapping: + return + + conn = None + own_connection = cursor is None + + if own_connection: + conn = get_db_conn() + cursor = conn.cursor() + + if cursor is None: + raise ValueError("Database cursor is required") + + try: + update_data: List[tuple] = [] + for mapping in face_cluster_mapping: + face_id = mapping.get("face_id") + cluster_id = mapping.get("cluster_id") + update_data.append((cluster_id, face_id)) + + cursor.executemany( + """ + UPDATE faces + SET cluster_id = ? + WHERE face_id = ? + """, + update_data, + ) + + if own_connection and conn: + conn.commit() + except sqlite3.Error as e: + if own_connection and conn: + conn.rollback() + print(f"Error updating face cluster IDs in batch: {e}") + raise + finally: + if own_connection and conn: + conn.close() + + +def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding]]]: + """Get mean embeddings for each cluster.""" + conn = None + try: + conn = get_db_conn() + cursor = conn.cursor() + + cursor.execute( + """ + SELECT f.cluster_id, f.embeddings + FROM faces f + WHERE f.cluster_id IS NOT NULL + """ + ) + rows = cursor.fetchall() + + if not rows: + return [] + + cluster_embeddings: Dict[str, List[np.ndarray]] = {} + for row in rows: + cluster_id, embeddings_json = row + if not embeddings_json: + continue + embeddings = np.array(json.loads(embeddings_json)) + + if cluster_id not in cluster_embeddings: + cluster_embeddings[cluster_id] = [] + cluster_embeddings[cluster_id].append(embeddings) + + cluster_means: List[Dict[str, Union[int, FaceEmbedding]]] = [] + for cluster_id, embeddings_list in cluster_embeddings.items(): + stacked_embeddings = np.stack(embeddings_list) + mean_embedding = np.mean(stacked_embeddings, axis=0) + cluster_means.append( + {"cluster_id": cluster_id, "mean_embedding": mean_embedding} + ) + + return cluster_means + except sqlite3.Error as e: + print(f"Error getting cluster mean embeddings: {e}") + return [] + finally: + if conn is not None: + conn.close() face_id: FaceId image_id: ImageId cluster_id: Optional[ClusterId] From 142e23a9e9193cd95e9c28d9f561cb541d11f789 Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 15:58:37 +0530 Subject: [PATCH 6/7] Fix: Remove duplicate code block and finalize Faces DB logic --- backend/app/database/faces.py | 489 +--------------------------------- 1 file changed, 6 insertions(+), 483 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index c9127fc19..cb4cf5e02 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -7,28 +7,13 @@ # Type definitions FaceId = int ImageId = str -ClusterId = int -BoundingBox = Dict[str, Union[int, float]] +ClusterId = str # Consistent with TEXT in DB FaceEmbedding = np.ndarray +BoundingBox = Dict[str, Union[int, float]] class FaceData(TypedDict): """Represents the full faces table structure""" -import sqlite3 -import json -import numpy as np -from typing import Optional, List, Dict, Union, TypedDict, Any -from app.config.settings import DATABASE_PATH - -# Type definitions -FaceId = int -ImageId = str -ClusterId = str -FaceEmbedding = np.ndarray # 512-dim vector -BoundingBox = Dict[str, int] # {'x': int, 'y': int, 'width': int, 'height': int} - - -class FaceData(TypedDict): face_id: FaceId image_id: ImageId embeddings: FaceEmbedding @@ -198,6 +183,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: cursor = conn.cursor() # Step 1: Get all faces with their image data + # We explicitly select face_id to ensure uniqueness cursor.execute( """ SELECT @@ -218,6 +204,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: face_results = cursor.fetchall() # Step 2: Get tags for all images that have faces + # We do this separately to avoid a Cartesian product in the main query cursor.execute( """ SELECT DISTINCT i.id, m.name as tag_name @@ -240,7 +227,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: if tag_name not in image_tags[image_id]: image_tags[image_id].append(tag_name) - # Step 3: Construct the result list + # Step 3: Construct the result list (one entry per face) faces: List[Dict[str, Any]] = [] for ( face_id, @@ -377,6 +364,7 @@ def db_update_face_cluster_ids_batch( conn = get_db_conn() cursor = conn.cursor() + # At this point cursor should never be None if cursor is None: raise ValueError("Database cursor is required") @@ -438,476 +426,11 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] cluster_embeddings[cluster_id] = [] cluster_embeddings[cluster_id].append(embeddings) - cluster_means: List[Dict[str, Union[int, FaceEmbedding]]] = [] - for cluster_id, embeddings_list in cluster_embeddings.items(): - stacked_embeddings = np.stack(embeddings_list) - mean_embedding = np.mean(stacked_embeddings, axis=0) - cluster_means.append( - {"cluster_id": cluster_id, "mean_embedding": mean_embedding} - ) - - return cluster_means - except sqlite3.Error as e: - print(f"Error getting cluster mean embeddings: {e}") - return [] - finally: - if conn is not None: - conn.close() - face_id: FaceId - image_id: ImageId - cluster_id: Optional[ClusterId] - embeddings: FaceEmbedding # Numpy array in application, stored as JSON string in DB - confidence: Optional[float] - bbox: Optional[BoundingBox] - - -FaceClusterMapping = Dict[FaceId, Optional[ClusterId]] - - -def db_create_faces_table() -> None: - """Create the faces table if it doesn't exist.""" - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - conn.execute("PRAGMA foreign_keys = ON") - cursor = conn.cursor() - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS faces ( - face_id INTEGER PRIMARY KEY AUTOINCREMENT, - image_id TEXT, - cluster_id INTEGER, - embeddings TEXT, - confidence REAL, - bbox TEXT, - FOREIGN KEY (image_id) REFERENCES images(id) ON DELETE CASCADE, - FOREIGN KEY (cluster_id) REFERENCES face_clusters(cluster_id) ON DELETE SET NULL - ) - """ - ) - conn.commit() - except sqlite3.Error as e: - print(f"Error creating faces table: {e}") - raise - finally: - if conn is not None: - conn.close() - - -def db_insert_face_embeddings( - image_id: ImageId, - embeddings: FaceEmbedding, - confidence: Optional[float] = None, - bbox: Optional[BoundingBox] = None, - cluster_id: Optional[ClusterId] = None, -) -> Optional[FaceId]: - """ - Insert face embeddings with additional metadata. - - Args: - image_id: ID of the image this face belongs to - embeddings: Face embedding vector (numpy array) - confidence: Confidence score for face detection (optional) - bbox: Bounding box coordinates as dict with keys: x, y, width, height (optional) - cluster_id: ID of the face cluster this face belongs to (optional) - - Returns: - FaceId if successful, None if insert failed - """ - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - embeddings_json = json.dumps(embeddings.tolist()) - - # Convert bbox to JSON string if provided - bbox_json = json.dumps(bbox) if bbox is not None else None - - cursor.execute( - """ - INSERT INTO faces (image_id, cluster_id, embeddings, confidence, bbox) - VALUES (?, ?, ?, ?, ?) - """, - (image_id, cluster_id, embeddings_json, confidence, bbox_json), - ) - - face_id = cursor.lastrowid - conn.commit() - return face_id - except sqlite3.Error as e: - print(f"Error inserting face embeddings: {e}") - if conn: - conn.rollback() - return None - finally: - if conn is not None: - conn.close() - - -def db_insert_face_embeddings_by_image_id( - image_id: ImageId, - embeddings: Union[FaceEmbedding, List[FaceEmbedding]], - confidence: Optional[Union[float, List[float]]] = None, - bbox: Optional[Union[BoundingBox, List[BoundingBox]]] = None, - cluster_id: Optional[Union[ClusterId, List[ClusterId]]] = None, -) -> Union[Optional[FaceId], List[Optional[FaceId]]]: - """ - Insert face embeddings using image path (convenience function). - - Args: - image_id: Image ID (uuid string) - embeddings: Face embedding vector (numpy array) or list of embeddings - confidence: Confidence score(s) for face detection (optional) - bbox: Bounding box coordinates or list of bounding boxes (optional) - cluster_id: Cluster ID(s) for the face(s) (optional) - - Returns: - FaceId or list of FaceIds. Can be None if insert failed. - """ - - # Handle multiple faces in one image (Check if it's a list of numpy arrays) - if ( - isinstance(embeddings, list) - and len(embeddings) > 0 - and isinstance(embeddings[0], np.ndarray) - ): - face_ids: List[Optional[FaceId]] = [] - for i, emb in enumerate(embeddings): - # Extract single confidence value - conf: Optional[float] = None - if isinstance(confidence, list) and i < len(confidence): - conf = confidence[i] - elif isinstance(confidence, (int, float)): - conf = float(confidence) - - # Extract single bbox value - bb: Optional[BoundingBox] = None - if isinstance(bbox, list) and i < len(bbox): - bb = bbox[i] - elif isinstance(bbox, dict): - bb = bbox - - # Extract single cluster_id value - cid: Optional[ClusterId] = None - if isinstance(cluster_id, list) and i < len(cluster_id): - cid = cluster_id[i] - elif isinstance(cluster_id, int): - cid = cluster_id - - face_id = db_insert_face_embeddings(image_id, emb, conf, bb, cid) - face_ids.append(face_id) - return face_ids - else: - # Single face - extract single values from potential lists - single_conf: Optional[float] = None - if isinstance(confidence, list) and len(confidence) > 0: - single_conf = confidence[0] - elif isinstance(confidence, (int, float)): - single_conf = float(confidence) - - single_bbox: Optional[BoundingBox] = None - if isinstance(bbox, list) and len(bbox) > 0: - single_bbox = bbox[0] - elif isinstance(bbox, dict): - single_bbox = bbox - - single_cid: Optional[ClusterId] = None - if isinstance(cluster_id, list) and len(cluster_id) > 0: - single_cid = cluster_id[0] - elif isinstance(cluster_id, int): - single_cid = cluster_id - - return db_insert_face_embeddings( - image_id, embeddings, single_conf, single_bbox, single_cid - ) - - -def get_all_face_embeddings() -> List[Dict[str, Any]]: - """ - Get all face embeddings with associated image data. - - Returns: - List of dictionaries, one per face. - Note: Images with multiple faces will appear multiple times, - once for each face entry. - """ - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - # Step 1: Get all faces with their image data - # We explicitly select face_id to ensure uniqueness - cursor.execute( - """ - SELECT - f.face_id, - f.embeddings, - f.bbox, - i.id, - i.path, - i.folder_id, - i.thumbnailPath, - i.metadata, - i.isTagged - FROM faces f - JOIN images i ON f.image_id = i.id - ORDER BY i.path, f.face_id - """ - ) - face_results = cursor.fetchall() - - # Step 2: Get tags for all images that have faces - # We do this separately to avoid a Cartesian product in the main query - cursor.execute( - """ - SELECT DISTINCT i.id, m.name as tag_name - FROM faces f - JOIN images i ON f.image_id = i.id - LEFT JOIN image_classes ic ON i.id = ic.image_id - LEFT JOIN mappings m ON ic.class_id = m.class_id - WHERE m.name IS NOT NULL - """ - ) - tag_results = cursor.fetchall() - - from app.utils.images import image_util_parse_metadata - - # Build a mapping of image_id -> list of tags - image_tags: Dict[str, List[str]] = {} - for image_id, tag_name in tag_results: - if image_id not in image_tags: - image_tags[image_id] = [] - if tag_name not in image_tags[image_id]: - image_tags[image_id].append(tag_name) - - # Step 3: Construct the result list (one entry per face) - faces: List[Dict[str, Any]] = [] - for ( - face_id, - embeddings, - bbox, - image_id, - path, - folder_id, - thumbnail_path, - metadata, - is_tagged, - ) in face_results: - try: - embeddings_json = json.loads(embeddings) if embeddings else None - bbox_json = json.loads(bbox) if bbox else None - except json.JSONDecodeError: - continue - - # Attach tags belonging to this image - tags = image_tags.get(image_id) - if tags is not None and len(tags) == 0: - tags = None - - faces.append({ - "face_id": face_id, - "embeddings": embeddings_json, - "bbox": bbox_json, - # Image Metadata - "id": image_id, - "path": path, - "folder_id": folder_id, - "thumbnailPath": thumbnail_path, - "metadata": image_util_parse_metadata(metadata), - "isTagged": bool(is_tagged), - "tags": tags, - }) - - return faces - except sqlite3.Error as e: - print(f"Error getting face embeddings: {e}") - return [] - finally: - if conn is not None: - conn.close() - - -def db_get_faces_unassigned_clusters() -> List[Dict[str, Union[FaceId, FaceEmbedding]]]: - """ - Get all faces that don't have assigned clusters. - - Returns: - List of dictionaries containing face_id and embeddings (as numpy array) - """ - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - cursor.execute("SELECT face_id, embeddings FROM faces WHERE cluster_id IS NULL") - - rows = cursor.fetchall() - - faces: List[Dict[str, Union[FaceId, FaceEmbedding]]] = [] - for row in rows: - face_id, embeddings_json = row - # Convert JSON string back to numpy array - embeddings = np.array(json.loads(embeddings_json)) - faces.append({"face_id": face_id, "embeddings": embeddings}) - - return faces - except sqlite3.Error as e: - print(f"Error getting unassigned faces: {e}") - return [] - finally: - if conn is not None: - conn.close() - - -def db_get_all_faces_with_cluster_names() -> ( - List[Dict[str, Union[FaceId, FaceEmbedding, Optional[str]]]] -): - """ - Get all faces with their corresponding cluster names. - - Returns: - List of dictionaries containing face_id, embeddings (as numpy array), and cluster_name - """ - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - cursor.execute( - """ - SELECT f.face_id, f.embeddings, fc.cluster_name - FROM faces f - LEFT JOIN face_clusters fc ON f.cluster_id = fc.cluster_id - ORDER BY f.face_id - """ - ) - - rows = cursor.fetchall() - - faces: List[Dict[str, Union[FaceId, FaceEmbedding, Optional[str]]]] = [] - for row in rows: - face_id, embeddings_json, cluster_name = row - # Convert JSON string back to numpy array - embeddings = np.array(json.loads(embeddings_json)) - faces.append( - { - "face_id": face_id, - "embeddings": embeddings, - "cluster_name": cluster_name, - } - ) - - return faces - except sqlite3.Error as e: - print(f"Error getting faces with cluster names: {e}") - return [] - finally: - if conn is not None: - conn.close() - - -def db_update_face_cluster_ids_batch( - face_cluster_mapping: List[Dict[str, Union[FaceId, ClusterId, None]]], - cursor: Optional[sqlite3.Cursor] = None, -) -> None: - """ - Update cluster IDs for multiple faces in batch. - - Args: - face_cluster_mapping: List of dictionaries containing face_id and cluster_id pairs - Each dict should have keys: 'face_id' and 'cluster_id' - cursor: Optional existing database cursor. If None, creates a new connection. - """ - if not face_cluster_mapping: - return - - conn = None - own_connection = cursor is None - - if own_connection: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - # At this point cursor should never be None - if cursor is None: - raise ValueError("Database cursor is required") - - try: - # Prepare update data as tuples (cluster_id, face_id) - update_data: List[tuple] = [] - for mapping in face_cluster_mapping: - face_id = mapping.get("face_id") - cluster_id = mapping.get("cluster_id") - update_data.append((cluster_id, face_id)) - - cursor.executemany( - """ - UPDATE faces - SET cluster_id = ? - WHERE face_id = ? - """, - update_data, - ) - - if own_connection and conn: - conn.commit() - except sqlite3.Error as e: - if own_connection and conn: - conn.rollback() - print(f"Error updating face cluster IDs in batch: {e}") - raise - finally: - if own_connection and conn: - conn.close() - - -def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding]]]: - """ - Get cluster IDs and their corresponding mean face embeddings. - - Returns: - List of dictionaries containing cluster_id and mean_embedding (as numpy array) - Only returns clusters that have at least one face assigned - """ - conn = None - try: - conn = sqlite3.connect(DATABASE_PATH) - cursor = conn.cursor() - - cursor.execute( - """ - SELECT f.cluster_id, f.embeddings - FROM faces f - WHERE f.cluster_id IS NOT NULL - ORDER BY f.cluster_id - """ - ) - - rows = cursor.fetchall() - - if not rows: - return [] - - # Group embeddings by cluster_id - cluster_embeddings: Dict[int, List[np.ndarray]] = {} - for row in rows: - cluster_id, embeddings_json = row - # Convert JSON string back to numpy array - embeddings = np.array(json.loads(embeddings_json)) - - if cluster_id not in cluster_embeddings: - cluster_embeddings[cluster_id] = [] - cluster_embeddings[cluster_id].append(embeddings) - - # Calculate mean embeddings for each cluster cluster_means: List[Dict[str, Union[int, FaceEmbedding]]] = [] for cluster_id, embeddings_list in cluster_embeddings.items(): # Stack all embeddings for this cluster and calculate mean stacked_embeddings = np.stack(embeddings_list) mean_embedding = np.mean(stacked_embeddings, axis=0) - cluster_means.append( {"cluster_id": cluster_id, "mean_embedding": mean_embedding} ) From 034b58c13256d92e3152e5bab7925597b3831ac3 Mon Sep 17 00:00:00 2001 From: Sandeep Chauhan Date: Tue, 13 Jan 2026 15:58:50 +0530 Subject: [PATCH 7/7] Fix: Remove duplicate code block and finalize Faces DB logic --- backend/app/database/faces.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/backend/app/database/faces.py b/backend/app/database/faces.py index cb4cf5e02..a51f31001 100644 --- a/backend/app/database/faces.py +++ b/backend/app/database/faces.py @@ -7,13 +7,12 @@ # Type definitions FaceId = int ImageId = str -ClusterId = str # Consistent with TEXT in DB -FaceEmbedding = np.ndarray -BoundingBox = Dict[str, Union[int, float]] +ClusterId = str +FaceEmbedding = np.ndarray # 512-dim vector +BoundingBox = Dict[str, int] # {'x': int, 'y': int, 'width': int, 'height': int} class FaceData(TypedDict): - """Represents the full faces table structure""" face_id: FaceId image_id: ImageId embeddings: FaceEmbedding @@ -183,7 +182,6 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: cursor = conn.cursor() # Step 1: Get all faces with their image data - # We explicitly select face_id to ensure uniqueness cursor.execute( """ SELECT @@ -193,8 +191,8 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: i.id, i.path, i.folder_id, - i.thumbnailPath, - i.metadata, + i.thumbnailPath, + i.metadata, i.isTagged FROM faces f JOIN images i ON f.image_id = i.id @@ -204,7 +202,6 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: face_results = cursor.fetchall() # Step 2: Get tags for all images that have faces - # We do this separately to avoid a Cartesian product in the main query cursor.execute( """ SELECT DISTINCT i.id, m.name as tag_name @@ -227,7 +224,7 @@ def get_all_face_embeddings() -> List[Dict[str, Any]]: if tag_name not in image_tags[image_id]: image_tags[image_id].append(tag_name) - # Step 3: Construct the result list (one entry per face) + # Step 3: Construct the result list faces: List[Dict[str, Any]] = [] for ( face_id, @@ -364,7 +361,6 @@ def db_update_face_cluster_ids_batch( conn = get_db_conn() cursor = conn.cursor() - # At this point cursor should never be None if cursor is None: raise ValueError("Database cursor is required") @@ -428,7 +424,6 @@ def db_get_cluster_mean_embeddings() -> List[Dict[str, Union[int, FaceEmbedding] cluster_means: List[Dict[str, Union[int, FaceEmbedding]]] = [] for cluster_id, embeddings_list in cluster_embeddings.items(): - # Stack all embeddings for this cluster and calculate mean stacked_embeddings = np.stack(embeddings_list) mean_embedding = np.mean(stacked_embeddings, axis=0) cluster_means.append(