Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 71 additions & 14 deletions backend/src/cms_backend/db/books.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from uuid import UUID

from pydantic import AnyUrl
from sqlalchemy import String, and_, or_, select
from sqlalchemy import String, and_, func, or_, select
from sqlalchemy.orm import Session as OrmSession

from cms_backend.context import Context
Expand Down Expand Up @@ -145,6 +145,23 @@ def get_zim_urls_prod(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSchema
"""
Get view and download URLs for a list of ZIM IDs (Book IDs).
"""
# Get the latest books for each title/flavor combination
# as the book IDs passed might not include the latest
# and we need only the latest book per title/flavor combo
# to have the view URL.
latest_dates_subq = (
select(Book.title_id, Book.flavour, func.max(Book.date).label("max_date"))
.where(
Book.needs_processing.is_(False),
Book.has_error.is_(False),
Book.needs_file_operation.is_(False),
Book.location_kind == "prod",
Book.title_id.in_(select(Book.title_id).where(Book.id.in_(zim_ids))),
)
.group_by(Book.title_id, Book.flavour)
.subquery()
)

stmt = (
select(
Book.id.label("book_id"),
Expand All @@ -156,10 +173,18 @@ def get_zim_urls_prod(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSchema
Collection.view_base_url,
CollectionTitle.path.label("subpath"),
BookLocation.filename,
(Book.date == latest_dates_subq.c.max_date).label("is_latest"),
)
.join(Title, Book.title_id == Title.id)
.join(CollectionTitle, CollectionTitle.title_id == Title.id)
.join(Collection, Collection.id == CollectionTitle.collection_id)
.join(
latest_dates_subq,
and_(
Book.title_id == latest_dates_subq.c.title_id,
Book.flavour == latest_dates_subq.c.flavour,
),
)
.join(
BookLocation,
and_(
Expand All @@ -178,7 +203,7 @@ def get_zim_urls_prod(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSchema
Book.location_kind == "prod",
)
)
.order_by(Title.id, Book.flavour, Book.created_at.desc())
.order_by(Title.id, Book.flavour, Book.date.desc(), Book.created_at.desc())
)

result = ZimUrlsSchema(urls={zim_id: [] for zim_id in zim_ids})
Expand All @@ -200,7 +225,7 @@ def get_zim_urls_prod(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSchema
)
)

if row.view_base_url:
if row.view_base_url and row.is_latest:
key = (row.title_id, row.book_flavour)
if key not in seen:
seen.add(key)
Expand All @@ -222,15 +247,40 @@ def get_zim_urls_staging(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSch
"""
Get view and download URLs for a list of ZIM IDs (Book IDs).
"""
# Get the latest books for each title/flavor combination
# as the book IDs passed might not include the latest
# and we need only the latest book per title/flavor combo
# to have the view URL.
latest_dates_subq = (
select(Book.title_id, Book.flavour, func.max(Book.date).label("max_date"))
.where(
Book.needs_processing.is_(False),
Book.has_error.is_(False),
Book.needs_file_operation.is_(False),
Book.location_kind == "staging",
Book.title_id.in_(select(Book.title_id).where(Book.id.in_(zim_ids))),
)
.group_by(Book.title_id, Book.flavour)
.subquery()
)

stmt = (
select(
Book.id.label("book_id"),
Book.location_kind.label("book_location_kind"),
Title.id.label("title_id"),
Book.flavour.label("book_flavour"),
BookLocation.filename,
(Book.date == latest_dates_subq.c.max_date).label("is_latest"),
)
.join(Title, Book.title_id == Title.id)
.join(
latest_dates_subq,
and_(
Book.title_id == latest_dates_subq.c.title_id,
Book.flavour == latest_dates_subq.c.flavour,
),
)
.join(
BookLocation,
and_(
Expand All @@ -249,10 +299,11 @@ def get_zim_urls_staging(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSch
Book.location_kind == "staging",
)
)
.order_by(Title.id, Book.flavour, Book.created_at.desc())
.order_by(Title.id, Book.flavour, Book.date.desc(), Book.created_at.desc())
)

result = ZimUrlsSchema(urls={zim_id: [] for zim_id in zim_ids})
seen: set[tuple[str | None, str | None]] = set()

for row in session.execute(stmt).all():
result.urls[row.book_id].append(
Expand All @@ -271,16 +322,22 @@ def get_zim_urls_staging(session: OrmSession, zim_ids: list[UUID]) -> ZimUrlsSch
)
)

filename_without_suffix = (
row.filename[:-4] if row.filename.endswith(".zim") else row.filename
)
result.urls[row.book_id].append(
ZimUrlSchema(
kind="view",
url=AnyUrl(f"{Context.staging_view_base_url}{filename_without_suffix}"),
collection="staging",
)
)
if row.is_latest:
key = (row.title_id, row.book_flavour)
if key not in seen:
seen.add(key)
filename_without_suffix = (
row.filename[:-4] if row.filename.endswith(".zim") else row.filename
)
result.urls[row.book_id].append(
ZimUrlSchema(
kind="view",
url=AnyUrl(
f"{Context.staging_view_base_url}{filename_without_suffix}"
),
collection="staging",
)
)

return result

Expand Down
143 changes: 140 additions & 3 deletions backend/tests/db/test_books.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,11 @@ def test_get_zim_urls(
collection = create_collection(warehouse=warehouse)
create_collection_title(title=title, collection=collection, path=Path(""))

book = create_book(zim_metadata={"Name": title.name})
book = create_book(
zim_metadata={"Name": title.name},
flavour="all",
date="2023-01-01",
)
book.title = title
book.location_kind = "prod"
title.books.append(book)
Expand Down Expand Up @@ -421,7 +425,11 @@ def test_get_zim_urls_book_with_subpath(
subpath = Path("wikipedia")
create_collection_title(title=title, collection=collection, path=subpath)

book = create_book(zim_metadata={"Name": title.name})
book = create_book(
zim_metadata={"Name": title.name},
flavour="all",
date="2023-01-01",
)
book.title = title
book.location_kind = "prod"
title.books.append(book)
Expand Down Expand Up @@ -474,7 +482,11 @@ def test_get_zim_urls_book_in_staging(
subpath = Path("wikipedia")
create_collection_title(title=title, collection=collection, path=subpath)

book = create_book(zim_metadata={"Name": title.name})
book = create_book(
zim_metadata={"Name": title.name},
flavour="all",
date="2023-01-01",
)
book.title = title
book.location_kind = "staging"
title.books.append(book)
Expand Down Expand Up @@ -674,6 +686,7 @@ def test_get_zim_urls_single_view_link_for_multiple_books_with_same_title_flavou
zim_metadata={"Name": title.name},
created_at=now - datetime.timedelta(days=7),
flavour="test",
date="2023-01-02",
)
book1.title = title
book1.location_kind = "prod"
Expand All @@ -691,6 +704,7 @@ def test_get_zim_urls_single_view_link_for_multiple_books_with_same_title_flavou
zim_metadata={"Name": title.name},
created_at=now - datetime.timedelta(days=14),
flavour="test",
date="2023-01-01",
)
book2.title = title
book2.location_kind = "prod"
Expand Down Expand Up @@ -720,6 +734,129 @@ def test_get_zim_urls_single_view_link_for_multiple_books_with_same_title_flavou
assert book2_view_url is None


def test_get_zim_urls_tie_breaker_same_date(
dbsession: OrmSession,
create_book: Callable[..., Book],
create_title: Callable[..., Title],
create_warehouse: Callable[..., Warehouse],
create_collection: Callable[..., Collection],
create_collection_title: Callable[..., CollectionTitle],
create_book_location: Callable[..., BookLocation],
):
warehouse = create_warehouse()
title = create_title(name="test_en_all")
collection = create_collection(warehouse=warehouse)
create_collection_title(title=title, collection=collection, path=Path(""))
now = getnow()

def add_book(flavour: str, date: str, days_old: int, filename: str) -> Book:
book = create_book(
zim_metadata={"Name": title.name},
created_at=now - datetime.timedelta(days=days_old),
flavour=flavour,
date=date,
)
book.title = title
book.location_kind = "prod"
title.books.append(book)
create_book_location(
book=book,
warehouse_id=warehouse.id,
path=Path(""),
filename=filename,
status="current",
)
return book

# Both books have same date but different created_at
newer_created = add_book("maxi", "2023-01-01", 7, "test_test_newer.zim")
older_created = add_book("maxi", "2023-01-01", 14, "test_test_older.zim")

dbsession.flush()

result = get_zim_urls(dbsession, zim_ids=[newer_created.id, older_created.id])

assert newer_created.id in result.urls
assert older_created.id in result.urls

# newer_created should get both download and view
assert len(result.urls[newer_created.id]) == 2
newer_view_url = next(
(u for u in result.urls[newer_created.id] if u.kind == "view"), None
)
assert newer_view_url is not None

# older_created should only get download
assert len(result.urls[older_created.id]) == 1
older_view_url = next(
(u for u in result.urls[older_created.id] if u.kind == "view"), None
)
assert older_view_url is None


def test_get_zim_urls_no_view_link_if_latest_excluded(
dbsession: OrmSession,
create_book: Callable[..., Book],
create_title: Callable[..., Title],
create_warehouse: Callable[..., Warehouse],
create_collection: Callable[..., Collection],
create_collection_title: Callable[..., CollectionTitle],
create_book_location: Callable[..., BookLocation],
):
warehouse = create_warehouse()
title = create_title(name="test_en_all")
collection = create_collection(warehouse=warehouse)
create_collection_title(title=title, collection=collection, path=Path(""))
now = getnow()

def add_book(flavour: str, date: str, days_old: int, filename: str) -> Book:
book = create_book(
zim_metadata={"Name": title.name},
created_at=now - datetime.timedelta(days=days_old),
flavour=flavour,
date=date,
)
book.title = title
book.location_kind = "prod"
title.books.append(book)
create_book_location(
book=book,
warehouse_id=warehouse.id,
path=Path(""),
filename=filename,
status="current",
)
return book

# The truly latest books (not queried)
latest_test = add_book("test", "2023-01-02", 7, "test_test_latest.zim")
latest_all = add_book("all", "2023-01-02", 7, "test_all_latest.zim")

# The older books (queried)
older_test = add_book("test", "2023-01-01", 14, "test_test_older.zim")
older_all = add_book("all", "2023-01-01", 14, "test_all_older.zim")

dbsession.flush()

# We only ask for the older ones
result = get_zim_urls(dbsession, zim_ids=[older_test.id, older_all.id])

assert older_test.id in result.urls
assert older_all.id in result.urls
assert latest_test.id not in result.urls
assert latest_all.id not in result.urls

assert len(result.urls[older_test.id]) == 1
assert len(result.urls[older_all.id]) == 1

assert (
next((u for u in result.urls[older_test.id] if u.kind == "view"), None) is None
)
assert (
next((u for u in result.urls[older_all.id] if u.kind == "view"), None) is None
)


@pytest.mark.parametrize(
"location_kind,warehouse_id,path",
[
Expand Down
Loading