Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
332 changes: 165 additions & 167 deletions django_app/poetry.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion django_app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ duckdb = "^1.4.4"
opentelemetry-api = "1.25.0"
opentelemetry-sdk = "1.25.0"


[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
pytest-django = "^4.12.0"
Expand Down
1,263 changes: 720 additions & 543 deletions notebooks/poetry.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion notebooks/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ package-mode = false
python = ">=3.12,<3.13"
redbox = {path="../redbox", develop=false}
redbox_app = {path="../django_app", develop=false}
langchain-mcp-adapters = "^0.0.11"
django = "5.2.11"

[tool.poetry.group.dev.dependencies]
Expand Down
245 changes: 142 additions & 103 deletions redbox/poetry.lock

Large diffs are not rendered by default.

94 changes: 53 additions & 41 deletions redbox/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,59 +1,71 @@
[tool.ruff]
line-length = 120
target-version = 'py312'
target-version = "py312"

[tool.poetry]
name = "redbox"
version = "0.4.0"
description = "RAG application that uses GenAI to chat with and summarise civil service documents"
authors = ["DBT Redbox <redbox@businessandtrade.gov.uk>", "i.AI <i-dot-ai-enquiries@cabinetoffice.gov.uk>"]
authors = [
"DBT Redbox <redbox@businessandtrade.gov.uk>",
"i.AI <i-dot-ai-enquiries@cabinetoffice.gov.uk>"
]
license = "MIT"
readme = "../README.md"

[tool.poetry.dependencies]
python = ">=3.12,<3.13"
pydantic = "^2.7.1"
elasticsearch = "^8.15.0"
langchain-community = "0.3.27"
langchain = "^0.3.4"
langchain_openai = ">0.1.21"
boto3 = "^1.34.160"
pydantic-settings = "^2.3.4"
langchain-elasticsearch = ">0.2.2"
pytest-dotenv = "^0.5.2"
kneed = "^0.8.5"
langgraph = "^0.2.76"
langchain-aws = "^0.2.3"
wikipedia = "^1.4.0"
opensearch-py = "^2.7.1"
scikit-learn = "^1.5.2"
django-environ = "^0.12.0"
mohawk = "^1.1.0"
django-waffle = "^4.2.0"
langchain-mcp-adapters = "^0.0.11"
pymupdf = "^1.26.0"
ddtrace = "^3.10.0"

pydantic = "^2.7"
pydantic-settings = "^2.3"

langchain = "^1.2"
langchain-community = "^0.4.1"
langchain-openai = "^1.1"
langchain-aws = "^1.0.0"
langchain-elasticsearch = "^1.0.0"
langgraph = "^1.1"

elasticsearch = "^8.15"
opensearch-py = "^2.7"

boto3 = "^1.34"

starlette = "^0.49"

django = "^5.2"
django-environ = "^0.12"
django-waffle = "^4.2"
django-requestlogs = "^0.8"
django-log-formatter-asim = "^1.3.0"
pytest = "^8.4.2"
urllib3 = "2.6.3"
langchain-text-splitters = "0.3.9"
starlette = "0.49.1"
duckdb = "^1.4.3"
django = "5.2.11"
protobuf = "5.29.6"
pandas = "^2.3.3"
django-log-formatter-asim = "^1.3"

pandas = "^2.3"
scikit-learn = "^1.5"
duckdb = "^1.4"
pymupdf = "^1.26"
kneed = "^0.8"

mcp = "^1.7.1"

wikipedia = "^1.4"
mohawk = "^1.1"
ddtrace = "^3.10"

urllib3 = "^2.2"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
moto = "^5.0.12"
pytest-cov = "^5.0.0"
pytest-asyncio = "^0.23.6"
jsonlines = "^4.0.0"
deepeval = "^3.8.4"
pytest-mock = "^3.14.0"
boto3-stubs = {extras = ["essential"], version = "^1.35.28"}
requests-mock = "^1.12.1"
pytest = "^8.4"
pytest-cov = "^5.0"
pytest-asyncio = "^0.23"
pytest-dotenv = "^0.5"
pytest-mock = "^3.14"
requests-mock = "^1.12"

moto = "^5.0"
jsonlines = "^4.0"
deepeval = "^3.8"

boto3-stubs = { extras = ["essential"], version = "^1.35" }

[build-system]
requires = ["poetry-core"]
Expand Down
2 changes: 1 addition & 1 deletion redbox/redbox/chains/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from io import BytesIO
from typing import TYPE_CHECKING, Iterator

from langchain.vectorstores import VectorStore
from langchain_core.vectorstores import VectorStore
from langchain_core.documents.base import Document
from langchain_core.runnables import Runnable, RunnableLambda, chain

Expand Down
2 changes: 1 addition & 1 deletion redbox/redbox/graph/nodes/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import pandas as pd
from botocore.exceptions import EventStreamError
from langchain.schema import StrOutputParser
from langchain_core.output_parsers import StrOutputParser
from langchain_core.callbacks.manager import dispatch_custom_event
from langchain_core.documents import Document
from langchain_core.messages import AIMessage, HumanMessage, RemoveMessage
Expand Down
25 changes: 9 additions & 16 deletions redbox/redbox/graph/nodes/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import threading
import time
from io import StringIO
from typing import Annotated, Callable, Iterable, Literal, Union
from typing import Callable, Iterable, Literal, Union

import boto3
import duckdb
Expand All @@ -20,7 +20,6 @@
from langchain_core.embeddings.embeddings import Embeddings
from langchain_core.messages import ToolCall
from langchain_core.tools import Tool, tool
from langgraph.prebuilt import InjectedState
from mohawk import Sender
from opensearchpy import OpenSearch
from sklearn.metrics.pairwise import cosine_similarity
Expand Down Expand Up @@ -62,9 +61,7 @@ def format_result(loop, content, artifact, status, is_intermediate_step):

def build_document_from_prompt_tool(loop: bool = False):
@tool(response_format="content_and_artifact")
def _retrieve_document_from_prompt(
state: Annotated[RedboxState, InjectedState], is_intermediate_step: bool = False
) -> tuple:
def _retrieve_document_from_prompt(state: RedboxState, is_intermediate_step: bool = False) -> tuple:
"""
Retrieve document from user prompt

Expand All @@ -87,9 +84,7 @@ def _retrieve_document_from_prompt(

def build_retrieve_document_full_text(es_client: Union[Elasticsearch, OpenSearch], index_name: str, loop: bool = False):
@tool(response_format="content_and_artifact")
def _retrieve_document_full_text(
state: Annotated[RedboxState, InjectedState], is_intermediate_step: bool = False
) -> tuple:
def _retrieve_document_full_text(state: RedboxState, is_intermediate_step: bool = False) -> tuple:
"""
Retrieve full texts from state.documents. This tool should be used when a full text from a document is required.
This tool does not retrieve documents in knowledge base.
Expand Down Expand Up @@ -156,7 +151,7 @@ def query_repo(el_query, is_intermediate_step, loop):

@tool(response_format="content_and_artifact")
def _retrieve_specific_file_knowledge_base(
state: Annotated[RedboxState, InjectedState],
state: RedboxState,
uri: str,
) -> tuple[str, list[Document]]:
"""
Expand All @@ -171,9 +166,7 @@ def _retrieve_specific_file_knowledge_base(
return query_repo(el_query, is_intermediate_step=False, loop=False)

@tool(response_format="content_and_artifact")
def _retrieve_knowledge_base(
state: Annotated[RedboxState, InjectedState], is_intermediate_step: bool = False
) -> tuple:
def _retrieve_knowledge_base(state: RedboxState, is_intermediate_step: bool = False) -> tuple:
"""
Retrieve full texts from all knowledge base files.

Expand Down Expand Up @@ -251,7 +244,7 @@ def search_repo(query, selected_files, permitted_files, ai_settings, start_time=
return format_documents(sorted_documents), sorted_documents

@tool(response_format="content_and_artifact")
def _search_documents(query: str, state: Annotated[RedboxState, InjectedState]) -> tuple[str, list[Document]]:
def _search_documents(query: str, state: RedboxState) -> tuple[str, list[Document]]:
"""
"Searches through state.documents to find and extract relevant information. This tool should be used whenever a query involves finding, searching, or retrieving information from documents that have already been uploaded or provided to the system.

Expand All @@ -273,7 +266,7 @@ def _search_documents(query: str, state: Annotated[RedboxState, InjectedState])
)

@tool(response_format="content_and_artifact")
def _search_knowledge_base(query: str, state: Annotated[RedboxState, InjectedState]) -> tuple[str, list[Document]]:
def _search_knowledge_base(query: str, state: RedboxState) -> tuple[str, list[Document]]:
"""
"Searches through knowledge base files to find and extract relevant information. This tool should be used whenever a query involves finding, searching, or retrieving information from knowledge base.

Expand Down Expand Up @@ -396,7 +389,7 @@ def build_query_tabular_file_tool(
def _query_tabular_file(
sql_query: str,
uri: str,
state: Annotated[RedboxState, InjectedState],
state: RedboxState,
) -> tuple[str, list[Document]]:
"""
Executes the SQL query against tabular files retrieved by the embedded retriever.
Expand Down Expand Up @@ -509,7 +502,7 @@ def recalculate_similarity(response, query, num_results):
return response

@tool(response_format="content_and_artifact")
def _search_govuk(query: str, state: Annotated[RedboxState, InjectedState]) -> tuple[str, list[Document]]:
def _search_govuk(query: str, state: RedboxState) -> tuple[str, list[Document]]:
"""
Search for documents on www.gov.uk based on a query string.
This endpoint is used to search for documents on www.gov.uk. There are many types of documents on www.gov.uk.
Expand Down
10 changes: 5 additions & 5 deletions redbox/redbox/graph/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from langchain_core.messages import AIMessage
from langchain_core.vectorstores import VectorStoreRetriever
from langgraph.graph import END, START, StateGraph
from langgraph.graph.graph import CompiledGraph
from langgraph.pregel import RetryPolicy
from langgraph.graph.state import CompiledStateGraph
from langgraph.types import RetryPolicy

from redbox.chains.components import get_structured_response_with_citations_parser
from redbox.chains.runnables import build_self_route_output_parser
Expand Down Expand Up @@ -134,7 +134,7 @@ def get_search_graph(
prompt_set: PromptSet = PromptSet.Search,
debug: bool = False,
final_sources: bool = True,
) -> CompiledGraph:
) -> CompiledStateGraph:
"""Creates a subgraph for retrieval augmented generation (RAG)."""
citations_output_parser, format_instructions = get_structured_response_with_citations_parser()

Expand Down Expand Up @@ -376,7 +376,7 @@ def get_summarise_graph(

def get_chat_graph(
debug: bool = False,
) -> CompiledGraph:
) -> CompiledStateGraph:
"""Creates a subgraph for standard chat."""
builder = StateGraph(RedboxState)

Expand Down Expand Up @@ -441,7 +441,7 @@ def build_new_route_graph(
all_chunks_retriever: VectorStoreRetriever,
agent_configs: Dict[str, AgentConfig],
debug: bool = False,
) -> CompiledGraph:
) -> CompiledStateGraph:
def update_submission_eval(state: RedboxState):
state.tasks_evaluator = EVAL_SUBMISSION
return state
Expand Down
2 changes: 1 addition & 1 deletion redbox/redbox/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import boto3
from elasticsearch import Elasticsearch
from langchain.globals import set_debug
from langchain_core.globals import set_debug
from opensearchpy import OpenSearch, RequestsHttpConnection
from pydantic import AnyUrl, BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict
Expand Down
Loading
Loading