Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions backend/.alembic/versions/b2c3d4e5f6a7_congressional_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Add Congressional Record tables

Revision ID: b2c3d4e5f6a7
Revises: a1b2c3d4e5f6
Create Date: 2026-03-24 00:00:00.000000

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ENUM

revision: str = 'b2c3d4e5f6a7'
down_revision: Union[str, None] = 'a1b2c3d4e5f6'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# Create CRECSection enum type only if it doesn't exist
op.execute("DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'crecsection') THEN CREATE TYPE crecsection AS ENUM ('Senate', 'House', 'Extensions', 'DailyDigest'); END IF; END $$;")

# Use create_type=False so SQLAlchemy doesn't try to re-create the existing enum
crec_section_ref = ENUM('Senate', 'House', 'Extensions', 'DailyDigest', name='crecsection', create_type=False)

op.create_table(
'crec_issue',
sa.Column('crec_issue_id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('issue_date', sa.Date(), nullable=True),
sa.Column('congress_id', sa.Integer(), sa.ForeignKey('congress.congress_id', ondelete='CASCADE'), nullable=True),
sa.Column('package_id', sa.String(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_crec_issue_issue_date', 'crec_issue', ['issue_date'], unique=True)
op.create_index('ix_crec_issue_congress_id', 'crec_issue', ['congress_id'])
op.create_unique_constraint('uq_crec_issue_package_id', 'crec_issue', ['package_id'])

op.create_table(
'crec_granule',
sa.Column('crec_granule_id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('crec_issue_id', sa.Integer(), sa.ForeignKey('crec_issue.crec_issue_id', ondelete='CASCADE'), nullable=True),
sa.Column('granule_id', sa.String(), nullable=True),
sa.Column('section', crec_section_ref, nullable=True),
sa.Column('title', sa.String(), nullable=True),
sa.Column('page_start', sa.String(), nullable=True),
sa.Column('page_end', sa.String(), nullable=True),
sa.Column('order_number', sa.Integer(), default=0),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_crec_granule_crec_issue_id', 'crec_granule', ['crec_issue_id'])
op.create_index('ix_crec_granule_section', 'crec_granule', ['section'])
op.create_unique_constraint('uq_crec_granule_granule_id', 'crec_granule', ['granule_id'])

op.create_table(
'crec_speech',
sa.Column('crec_speech_id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('crec_granule_id', sa.Integer(), sa.ForeignKey('crec_granule.crec_granule_id', ondelete='CASCADE'), nullable=True),
sa.Column('speaker_raw', sa.String(), nullable=True),
sa.Column('legislator_bioguide_id', sa.String(), sa.ForeignKey('legislator.bioguide_id', ondelete='SET NULL'), nullable=True),
sa.Column('order_number', sa.Integer(), default=0),
sa.Column('content_text', sa.String(), nullable=True),
sa.Column('word_count', sa.Integer(), default=0),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_crec_speech_crec_granule_id', 'crec_speech', ['crec_granule_id'])
op.create_index('ix_crec_speech_legislator_bioguide_id', 'crec_speech', ['legislator_bioguide_id'])

op.create_table(
'crec_bill_reference',
sa.Column('crec_bill_reference_id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('crec_speech_id', sa.Integer(), sa.ForeignKey('crec_speech.crec_speech_id', ondelete='CASCADE'), nullable=True),
sa.Column('legislation_id', sa.Integer(), sa.ForeignKey('legislation.legislation_id', ondelete='SET NULL'), nullable=True),
sa.Column('cite_text', sa.String(), nullable=True),
sa.Column('cite_type', sa.String(), nullable=True),
sa.Column('start_offset', sa.Integer(), nullable=True),
sa.Column('end_offset', sa.Integer(), nullable=True),
)
op.create_index('ix_crec_bill_reference_crec_speech_id', 'crec_bill_reference', ['crec_speech_id'])
op.create_index('ix_crec_bill_reference_legislation_id', 'crec_bill_reference', ['legislation_id'])

op.create_table(
'crec_summary',
sa.Column('crec_summary_id', sa.Integer(), primary_key=True, autoincrement=True),
sa.Column('crec_granule_id', sa.Integer(), sa.ForeignKey('crec_granule.crec_granule_id', ondelete='CASCADE'), nullable=True),
sa.Column('crec_issue_id', sa.Integer(), sa.ForeignKey('crec_issue.crec_issue_id', ondelete='CASCADE'), nullable=True),
sa.Column('summary', sa.String(), nullable=True),
sa.Column('summary_type', sa.String(), nullable=True),
sa.Column('prompt_batch_id', sa.Integer(), sa.ForeignKey('prompts.prompt_batch.prompt_batch_id', ondelete='CASCADE'), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
schema='prompts',
)
op.create_index('ix_crec_summary_crec_granule_id', 'crec_summary', ['crec_granule_id'], schema='prompts')
op.create_index('ix_crec_summary_crec_issue_id', 'crec_summary', ['crec_issue_id'], schema='prompts')
op.create_index('ix_crec_summary_prompt_batch_id', 'crec_summary', ['prompt_batch_id'], schema='prompts')


def downgrade() -> None:
op.drop_table('crec_summary', schema='prompts')
op.drop_table('crec_bill_reference')
op.drop_table('crec_speech')
op.drop_table('crec_granule')
op.drop_table('crec_issue')
sa.Enum(name='crecsection').drop(op.get_bind(), checkfirst=True)
138 changes: 138 additions & 0 deletions backend/congress_db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ def bind_expression(self, bindvalue):
return sa.cast(bindvalue, self)


class CRECSection(str, enum.Enum):
Senate = "Senate"
House = "House"
Extensions = "Extensions"
DailyDigest = "DailyDigest"


class LegislatorJob(str, enum.Enum):
Senator = "Senator"
Representative = "Representative"
Expand Down Expand Up @@ -1262,6 +1269,137 @@ class Appropriation(AppropriationsBase):
purpose = Column(String, default="")


class CRECIssue(Base):
"""
One row per daily Congressional Record issue
"""

__tablename__ = "crec_issue"

crec_issue_id = Column(Integer, primary_key=True)
issue_date = Column(Date, unique=True, index=True)
congress_id = Column(
Integer, ForeignKey("congress.congress_id", ondelete="CASCADE"), index=True
)
package_id = Column(String, unique=True)
created_at = Column(DateTime(timezone=False), server_default=func.now())

granules = relationship("CRECGranule", back_populates="issue")


class CRECGranule(Base):
"""
A discrete item/debate/segment within a daily Congressional Record issue
"""

__tablename__ = "crec_granule"

crec_granule_id = Column(Integer, primary_key=True)
crec_issue_id = Column(
Integer,
ForeignKey("crec_issue.crec_issue_id", ondelete="CASCADE"),
index=True,
)
granule_id = Column(String, unique=True)
section = Column(Enum(CRECSection), index=True)
title = Column(String)
page_start = Column(String, nullable=True)
page_end = Column(String, nullable=True)
order_number = Column(Integer, default=0)
created_at = Column(DateTime(timezone=False), server_default=func.now())

issue = relationship("CRECIssue", back_populates="granules")
speeches = relationship("CRECSpeech", back_populates="granule")


class CRECSpeech(Base):
"""
Individual speech segment within a granule, with speaker attribution
"""

__tablename__ = "crec_speech"

crec_speech_id = Column(Integer, primary_key=True)
crec_granule_id = Column(
Integer,
ForeignKey("crec_granule.crec_granule_id", ondelete="CASCADE"),
index=True,
)
speaker_raw = Column(String, nullable=True)
legislator_bioguide_id = Column(
String,
ForeignKey("legislator.bioguide_id", ondelete="SET NULL"),
index=True,
nullable=True,
)
order_number = Column(Integer, default=0)
content_text = Column(String)
word_count = Column(Integer, default=0)
created_at = Column(DateTime(timezone=False), server_default=func.now())

granule = relationship("CRECGranule", back_populates="speeches")
bill_references = relationship("CRECBillReference", back_populates="speech")


class CRECBillReference(Base):
"""
Bill citations found within speech text in the Congressional Record
"""

__tablename__ = "crec_bill_reference"

crec_bill_reference_id = Column(Integer, primary_key=True)
crec_speech_id = Column(
Integer,
ForeignKey("crec_speech.crec_speech_id", ondelete="CASCADE"),
index=True,
)
legislation_id = Column(
Integer,
ForeignKey("legislation.legislation_id", ondelete="SET NULL"),
index=True,
nullable=True,
)
cite_text = Column(String)
cite_type = Column(String)
start_offset = Column(Integer)
end_offset = Column(Integer)

speech = relationship("CRECSpeech", back_populates="bill_references")


class CRECSummary(PromptsBase):
"""
LLM-generated summaries of Congressional Record debates
"""

__tablename__ = "crec_summary"
__table_args__ = {"schema": "prompts"}

crec_summary_id = Column(Integer, primary_key=True)
crec_granule_id = Column(
Integer,
ForeignKey("crec_granule.crec_granule_id", ondelete="CASCADE"),
index=True,
nullable=True,
)
crec_issue_id = Column(
Integer,
ForeignKey("crec_issue.crec_issue_id", ondelete="CASCADE"),
index=True,
nullable=True,
)
summary = Column(String)
summary_type = Column(String)
prompt_batch_id = Column(
Integer,
ForeignKey(PromptBatch.prompt_batch_id, ondelete="CASCADE"),
index=True,
nullable=True,
)
created_at = Column(DateTime(timezone=False), server_default=func.now())


class User(AuthenticationBase):
__tablename__ = "user"

Expand Down
2 changes: 2 additions & 0 deletions backend/congress_fastapi/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from congress_fastapi.routes.stats import router as stats_router
from congress_fastapi.routes.uscode import router as uscode_router
from congress_fastapi.routes.committees import router as committees_router
from congress_fastapi.routes.congressional_record import router as crec_router
from congress_fastapi.utils.limiter import limiter
from slowapi import _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded
Expand Down Expand Up @@ -56,4 +57,5 @@ async def log_exceptions_middleware(request: Request, call_next):
app.include_router(stats_router)
app.include_router(uscode_router)
app.include_router(committees_router)
app.include_router(crec_router)
print("Loaded")
Loading
Loading