diff --git a/functions/package.json b/functions/package.json index d459dda2d..28c96e42f 100644 --- a/functions/package.json +++ b/functions/package.json @@ -29,6 +29,7 @@ "luxon": "^2.3.1", "nanoid": "^3.3.2", "object-hash": "^3.0.0", + "openai": "^6.33.0", "runtypes": "6.6.0", "ssl-root-cas": "^1.3.1", "typesense": "^1.2.2", diff --git a/functions/src/bills/billSummaries.test.ts b/functions/src/bills/billSummaries.test.ts new file mode 100644 index 000000000..b8a9d344c --- /dev/null +++ b/functions/src/bills/billSummaries.test.ts @@ -0,0 +1,266 @@ +import { + normalizeSummary, + parseTags, + getSummary, + getTags, + runBillSummaryTrigger +} from "./billSummaries" + +function makeOpenAIClient(responseContent: string | null) { + return { + chat: { + completions: { + create: jest.fn().mockResolvedValue({ + choices: [{ message: { content: responseContent } }] + }) + } + } + } as any +} + +function makeFailingOpenAIClient() { + return { + chat: { + completions: { + create: jest.fn().mockRejectedValue(new Error("API error")) + } + } + } as any +} + +function makeSequentialOpenAIClient(...responses: (string | null)[]) { + const mock = jest.fn() + for (const content of responses) { + mock.mockResolvedValueOnce({ + choices: [{ message: { content } }] + }) + } + return { + chat: { completions: { create: mock } } + } as any +} + +function makeSnapshot( + data: Record | undefined +): FirebaseFirestore.DocumentSnapshot { + const ref = { update: jest.fn().mockResolvedValue(undefined) } + return { + data: () => data, + ref + } as any +} + +describe("normalizeSummary", () => { + it("strips leading Summary: prefix", () => { + expect(normalizeSummary("Summary: This is a bill.")).toBe( + "This is a bill." + ) + }) + + it("collapses bullet list formatting", () => { + const input = "Summary:\n- Point one\n- Point two\n- Point three" + expect(normalizeSummary(input)).toBe("Point one Point two Point three") + }) + + it("trims whitespace and removes empty lines", () => { + const input = " \n Some text \n\n More text \n " + expect(normalizeSummary(input)).toBe("Some text More text") + }) + + it("handles plain text without prefix", () => { + expect(normalizeSummary("Just a summary.")).toBe("Just a summary.") + }) +}) + +describe("parseTags", () => { + it("parses # separated tags and filters to known topics", () => { + const response = "Consumer protection # Mental health # Fake topic" + const result = parseTags(response) + expect(result).toEqual(["Consumer protection", "Mental health"]) + }) + + it("returns empty array for all unknown tags", () => { + const result = parseTags("Unknown tag # Another fake") + expect(result).toEqual([]) + }) + + it("handles empty string", () => { + expect(parseTags("")).toEqual([]) + }) + + it("trims whitespace from tags", () => { + const result = parseTags(" Consumer protection # Mental health ") + expect(result).toEqual(["Consumer protection", "Mental health"]) + }) +}) + +describe("getSummary", () => { + it("returns summary on successful API call", async () => { + const client = makeOpenAIClient("Summary: This bill does something.") + const result = await getSummary(client, "H1234", "Title", "Text") + expect(result).toEqual({ + status: 1, + summary: "This bill does something." + }) + expect(client.chat.completions.create).toHaveBeenCalledTimes(1) + }) + + it("returns status -1 on empty API response", async () => { + const client = makeOpenAIClient(null) + const result = await getSummary(client, "H1234", "Title", "Text") + expect(result).toEqual({ status: -1, summary: "" }) + }) + + it("returns status -1 on API error", async () => { + const client = makeFailingOpenAIClient() + const result = await getSummary(client, "H1234", "Title", "Text") + expect(result).toEqual({ status: -1, summary: "" }) + }) +}) + +describe("getTags", () => { + it("returns parsed tags on successful API call", async () => { + const client = makeOpenAIClient( + "Consumer protection # Mental health # Income tax" + ) + const result = await getTags(client, "H1234", "Title", "A summary") + expect(result.status).toBe(1) + expect(result.tags).toEqual([ + "Consumer protection", + "Mental health", + "Income tax" + ]) + }) + + it("filters out unknown tags from response", async () => { + const client = makeOpenAIClient( + "Consumer protection # Completely made up tag" + ) + const result = await getTags(client, "H1234", "Title", "A summary") + expect(result.status).toBe(1) + expect(result.tags).toEqual(["Consumer protection"]) + }) + + it("returns status -2 on empty API response", async () => { + const client = makeOpenAIClient(null) + const result = await getTags(client, "H1234", "Title", "A summary") + expect(result).toEqual({ status: -2, tags: [] }) + }) + + it("returns status -2 on API error", async () => { + const client = makeFailingOpenAIClient() + const result = await getTags(client, "H1234", "Title", "A summary") + expect(result).toEqual({ status: -2, tags: [] }) + }) +}) + +describe("runBillSummaryTrigger", () => { + const billContext = { params: { bill_id: "H1234" } } + + it("returns early when snapshot has no data", async () => { + const snapshot = makeSnapshot(undefined) + const client = makeOpenAIClient("unused") + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) + + it("returns early when content is missing DocumentText", async () => { + const snapshot = makeSnapshot({ + content: { Title: "A title" } + }) + const client = makeOpenAIClient("unused") + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) + + it("returns early when content is missing Title", async () => { + const snapshot = makeSnapshot({ + content: { DocumentText: "Some text" } + }) + const client = makeOpenAIClient("unused") + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) + + it("does nothing when summary and topics already exist", async () => { + const snapshot = makeSnapshot({ + content: { Title: "Title", DocumentText: "Text" }, + summary: "Existing summary", + topics: [{ category: "Commerce", topic: "Consumer protection" }] + }) + const client = makeOpenAIClient("unused") + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) + + it("generates summary and topics when both are missing", async () => { + const client = makeSequentialOpenAIClient( + "Summary: A new summary", + "Consumer protection # Mental health # Income tax" + ) + + const snapshot = makeSnapshot({ + content: { Title: "Bill Title", DocumentText: "Bill text content" } + }) + + await runBillSummaryTrigger(snapshot, billContext, client) + + // Should have called update twice: once for summary, once for topics + expect(snapshot.ref.update).toHaveBeenCalledTimes(2) + expect(snapshot.ref.update).toHaveBeenCalledWith({ + summary: "A new summary" + }) + expect(snapshot.ref.update).toHaveBeenCalledWith({ + topics: expect.arrayContaining([ + expect.objectContaining({ topic: "Consumer protection" }), + expect.objectContaining({ topic: "Mental health" }), + expect.objectContaining({ topic: "Income tax" }) + ]) + }) + }) + + it("returns early when summary generation fails", async () => { + const client = makeFailingOpenAIClient() + + const snapshot = makeSnapshot({ + content: { Title: "Bill Title", DocumentText: "Bill text content" } + }) + + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) + + it("generates topics when summary exists but topics are missing", async () => { + const client = makeOpenAIClient( + "Consumer protection # Mental health" + ) + + const snapshot = makeSnapshot({ + content: { Title: "Bill Title", DocumentText: "Bill text content" }, + summary: "Existing summary" + }) + + await runBillSummaryTrigger(snapshot, billContext, client) + + // Should only call update once for topics (summary already exists) + expect(snapshot.ref.update).toHaveBeenCalledTimes(1) + expect(snapshot.ref.update).toHaveBeenCalledWith({ + topics: expect.arrayContaining([ + expect.objectContaining({ topic: "Consumer protection" }), + expect.objectContaining({ topic: "Mental health" }) + ]) + }) + }) + + it("does not write topics when tag generation fails", async () => { + const client = makeFailingOpenAIClient() + + const snapshot = makeSnapshot({ + content: { Title: "Bill Title", DocumentText: "Bill text content" }, + summary: "Existing summary" + }) + + await runBillSummaryTrigger(snapshot, billContext, client) + expect(snapshot.ref.update).not.toHaveBeenCalled() + }) +}) diff --git a/functions/src/bills/billSummaries.ts b/functions/src/bills/billSummaries.ts new file mode 100644 index 000000000..1cfe3402f --- /dev/null +++ b/functions/src/bills/billSummaries.ts @@ -0,0 +1,254 @@ +import { runWith } from "firebase-functions" +import OpenAI from "openai" +import { assignCategoriesToTopics } from "./topicParser" +import { CATEGORIES_BY_TOPIC } from "./types" + +const MODEL = "gpt-4o-mini" + +const SUMMARIZATION_PROMPT_PREFIX = `Can you please explain what the following MA bill means to a regular resident without specialized knowledge? + Please provide a one paragraph summary in a maximum of 4 sentences. Please be simple, direct and concise for the busy reader. + Please use politically neutral language, keeping in mind that readers may have various ideological perspectives. + Make bullet points if possible. + + Note that the bill refers to specific existing chapters and sections of the Mass General Laws (MGL). Use the corresponding names of Mass General Law chapter and sections for constructing your summary.` + +const SUMMARIZATION_INSTRUCTIONS = ` + INSTRUCTIONS: + + 1. Only provide Summary, no other details are required. + 2. Do not provide tags or other extraneous text besides the summary. + 3. Do not cite the title of the bill - the reader will already know that + 4. Do not cite specific section, chapter or title numbers of the MGL - the reader will not know what those sections are. + 5. Do not reference that this is a "MA" or "Massachusetts" bill - the reader will already know that. + 6. If referencing dates or other provisions of the bill, say that "this would happen if the bill is passed" rather than "this will happen". + + RESPONSE FORMAT: + + Summary:` + +const SUMMARIZATION_PROMPT = ` + ${SUMMARIZATION_PROMPT_PREFIX} + + Note that the bill refers to specific existing chapters and sections of the Mass General Laws (MGL). Use the corresponding names of Mass General Law chapter and sections for constructing your summary. + + The bill title is: \`{title}\` + + The bill text is: \`{context}\` + + The relevant section names are: \`None\` + + The relevant section text is: \`None\` + + The relevant committee information if available: \`None\` + + ${SUMMARIZATION_INSTRUCTIONS}` + +const TAGGING_INSTRUCTIONS = ` + INSTRUCTIONS: + 1. Choose minimum of 3 tags and no more than 5. + 2. Do not provide explanations for the tag choices. + 3. Do not output tags not listed above. + 4. Do not modify or paraphrase the tag names, choose directly from the list provided. + 5. Do not assign tags only for the sake of tagging; tag them only if they are relevant. + 6. Please apply a higher threshold of relevancy to assigning tags. We want to ensure all the tags are relevant. + 7. Respond with # separated tags. + + Tags: ` + +const TAGGING_PROMPT_USING_SUMMARIES = ` + Your Job here is to identify the tags that can be associated to the following MA Legislative bill. + Choose the closest relevant tags and do not output tags outside of the provided tags. + Please be politically neutral, keeping in mind that readers may have various ideological perspectives. + Below is the summary of the bill. + + The bill title is: {bill_title} + + The bill summary is: {context} + + List of tags: + - {tags} + + ${TAGGING_INSTRUCTIONS}` + +const KNOWN_TOPICS = new Set(Object.keys(CATEGORIES_BY_TOPIC)) + +export function normalizeSummary(summary: string): string { + const stripped = summary.replace(/^Summary:/, "") + const lines = stripped.split("\n") + const cleaned = lines + .map(line => line.replace(/^- /, "").trim()) + .filter(line => line !== "") + return cleaned.join(" ") +} + +export function parseTags(response: string): string[] { + return response + .split("#") + .map(t => t.trim()) + .filter(t => t !== "" && KNOWN_TOPICS.has(t)) +} + +function formatPrompt( + template: string, + vars: Record +): string { + let result = template + for (const [key, value] of Object.entries(vars)) { + result = result.replace(new RegExp(`\\{${key}\\}`, "g"), value) + } + return result +} + +export async function getSummary( + client: OpenAI, + billId: string, + title: string, + text: string +): Promise<{ status: number; summary: string }> { + const prompt = formatPrompt(SUMMARIZATION_PROMPT, { + title, + context: text + }) + + try { + const response = await client.chat.completions.create({ + model: MODEL, + messages: [{ role: "user", content: prompt }] + }) + + const content = response.choices[0]?.message?.content + if (!content) { + console.error( + `failed to generate summary for bill with id \`${billId}\`: empty response` + ) + return { status: -1, summary: "" } + } + + return { status: 1, summary: normalizeSummary(content) } + } catch (error) { + console.error( + `failed to generate summary for bill with id \`${billId}\`:`, + error + ) + return { status: -1, summary: "" } + } +} + +export async function getTags( + client: OpenAI, + billId: string, + title: string, + summary: string +): Promise<{ status: number; tags: string[] }> { + const allTopics = Object.keys(CATEGORIES_BY_TOPIC).join("\n- ") + const prompt = formatPrompt(TAGGING_PROMPT_USING_SUMMARIES, { + bill_title: title, + context: summary, + tags: allTopics + }) + + try { + const response = await client.chat.completions.create({ + model: MODEL, + messages: [{ role: "user", content: prompt }] + }) + + const content = response.choices[0]?.message?.content + if (!content) { + console.error( + `failed to generate tags for bill with id \`${billId}\`: empty response` + ) + return { status: -2, tags: [] } + } + + const tags = parseTags(content) + return { status: 1, tags } + } catch (error) { + console.error( + `failed to generate tags for bill with id \`${billId}\`:`, + error + ) + return { status: -2, tags: [] } + } +} + +export async function runBillSummaryTrigger( + snapshot: FirebaseFirestore.DocumentSnapshot, + context: { params: { bill_id: string } }, + clientOverride?: OpenAI +): Promise { + const billId = context.params.bill_id + const data = snapshot.data() + + if (!data) { + console.log(`bill with id \`${billId}\` has no event data`) + return + } + + const client = + clientOverride ?? + new OpenAI({ + apiKey: process.env.OPENAI_API_KEY + }) + + let summary: string | undefined = data.summary + const documentTitle: string | undefined = data.content?.Title + const documentText: string | undefined = data.content?.DocumentText + + // If the summary is not already populated, generate it + if (!summary) { + if (!documentText || !documentTitle) { + console.log( + `bill with id \`${billId}\` unable to fetch document text or title` + ) + return + } + + const summaryResult = await getSummary( + client, + billId, + documentTitle, + documentText + ) + + if (summaryResult.status !== 1) { + console.log( + `failed to generate summary for bill with id \`${billId}\`, got ${summaryResult.status}` + ) + return + } + + summary = summaryResult.summary + await snapshot.ref.update({ summary }) + console.log(`Successfully updated summary for bill with id \`${billId}\``) + } + + // If the topics are already populated, we are done + if (data.topics) { + console.log(`bill with id \`${billId}\` has topics`) + return + } + + const tagsResult = await getTags(client, billId, documentTitle!, summary) + + if (tagsResult.status !== 1) { + console.log( + `failed to generate tags for bill with id \`${billId}\`, got ${tagsResult.status}` + ) + return + } + + const topics = assignCategoriesToTopics(tagsResult.tags) + await snapshot.ref.update({ topics }) + console.log(`Successfully updated topics for bill with id \`${billId}\``) +} + +export const generateBillSummary = runWith({ + secrets: ["OPENAI_API_KEY"], + timeoutSeconds: 120, + memory: "512MB" +}) + .firestore.document("generalCourts/{sessionId}/bills/{bill_id}") + .onCreate(async (snapshot, context) => { + await runBillSummaryTrigger(snapshot, context) + }) diff --git a/functions/src/bills/index.ts b/functions/src/bills/index.ts index 6ae961853..8f276fb6a 100644 --- a/functions/src/bills/index.ts +++ b/functions/src/bills/index.ts @@ -1,4 +1,5 @@ export * from "./backfillTestimonyCounts" +export * from "./billSummaries" export * from "./bills" export * from "./search" export * from "./updateBillReferences" diff --git a/functions/src/index.ts b/functions/src/index.ts index b396108d9..e82aa0236 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -2,6 +2,7 @@ export { modifyAccount, createFakeOrg, createFakeTestimony } from "./auth" export { backfillTestimonyCounts, fetchBillBatch, + generateBillSummary, startBillBatches, syncBillToSearchIndex, updateBillReferences, diff --git a/functions/yarn.lock b/functions/yarn.lock index 6289a53f7..59ae9f1f7 100644 --- a/functions/yarn.lock +++ b/functions/yarn.lock @@ -5783,6 +5783,11 @@ open@^6.3.0: dependencies: is-wsl "^1.1.0" +openai@^6.33.0: + version "6.33.0" + resolved "https://registry.yarnpkg.com/openai/-/openai-6.33.0.tgz#d09691a91df327d66d76f39f9b590aa55dd699dd" + integrity sha512-xAYN1W3YsDXJWA5F277135YfkEk6H7D3D6vWwRhJ3OEkzRgcyK8z/P5P9Gyi/wB4N8kK9kM5ZjprfvyHagKmpw== + openapi3-ts@^3.1.1: version "3.2.0" resolved "https://registry.npmjs.org/openapi3-ts/-/openapi3-ts-3.2.0.tgz"