From cccd35965d6f1349529e4b16cfe8b3e38d6c9a38 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Thu, 16 Apr 2026 10:41:54 -0400 Subject: [PATCH 01/17] feat: enable mypy session for documentai-toolbox --- .../noxfile.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/noxfile.py b/packages/google-cloud-documentai-toolbox/noxfile.py index 4d9415cb22e7..36729f34f270 100644 --- a/packages/google-cloud-documentai-toolbox/noxfile.py +++ b/packages/google-cloud-documentai-toolbox/noxfile.py @@ -480,10 +480,21 @@ def prerelease_deps(session, protobuf_implementation): @nox.session(python=DEFAULT_PYTHON_VERSION) def mypy(session): """Run the type checker.""" - - # TODO(https://github.com/googleapis/google-cloud-python/issues/16014): - # Enable mypy once this bug is fixed. - session.skip("Temporarily skip mypy. See issue 16014") + session.install( + "mypy<1.16.0", + "types-requests", + "types-protobuf", + "pandas-stubs", + ) + session.install("-e", ".") + session.run( + "mypy", + "-p", + "google.cloud.documentai_toolbox", + "--check-untyped-defs", + "--ignore-missing-imports", + *session.posargs, + ) @nox.session(python=DEFAULT_PYTHON_VERSION) From 6abd528bb3e3aa08849ff17b700bbd039ebe4de2 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 09:11:08 -0400 Subject: [PATCH 02/17] fix(documentai-toolbox): resolve mypy errors --- .../cloud/documentai_toolbox/__init__.py | 2 +- .../converters/config/bbox_conversion.py | 2 +- .../converters/config/block.py | 40 +++++++++---------- .../converters/converter.py | 2 +- .../converters/vision_helpers.py | 12 +++--- .../utilities/gcs_utilities.py | 3 +- .../documentai_toolbox/wrappers/document.py | 9 +++-- .../documentai_toolbox/wrappers/entity.py | 5 ++- .../cloud/documentai_toolbox/wrappers/page.py | 8 ++-- 9 files changed, 45 insertions(+), 38 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py index 83e23ba88552..781055edcae9 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py @@ -25,7 +25,7 @@ from .utilities import docai_utilities, gcs_utilities from .wrappers import document, entity, page -__all__ = (document, page, entity, converter, docai_utilities, gcs_utilities) +__all__ = ("document", "page", "entity", "converter", "docai_utilities", "gcs_utilities") class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index b256fea715d7..b523b4b0798c 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -190,7 +190,7 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: y_multiplier = 1.0 normalized_vertices: List[documentai.NormalizedVertex] = [] - if block.page_width and block.page_height: + if block.page_width and block.page_height and block.docproto_width is not None and block.docproto_height is not None: x_multiplier = _get_multiplier( docproto_coordinate=block.docproto_width, external_coordinate=block.page_width, diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 50fd63cea2a6..e2d0bcdc22eb 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -17,12 +17,12 @@ import dataclasses import json from types import SimpleNamespace -from typing import List, Optional, Type +from typing import Any, List, Optional, Type, cast from google.cloud import documentai -def _get_target_object(json_data: any, target_object: str) -> Optional[SimpleNamespace]: +def _get_target_object(json_data: Any, target_object: str) -> Any: r"""Returns SimpleNamespace of target_object. Args: @@ -72,45 +72,45 @@ class Block: page_number: Optional. """ - type_: SimpleNamespace = dataclasses.field(init=True, repr=False) - text: SimpleNamespace = dataclasses.field(init=True, repr=False) - bounding_box: Optional[SimpleNamespace] = dataclasses.field( + type_: Any = dataclasses.field(init=True, repr=False) + text: Any = dataclasses.field(init=True, repr=False) + bounding_box: Any = dataclasses.field( init=True, repr=False, default=None ) - block_references: Optional[SimpleNamespace] = dataclasses.field( + block_references: Any = dataclasses.field( init=True, repr=False, default=None ) - block_id: Optional[SimpleNamespace] = dataclasses.field( + block_id: Any = dataclasses.field( init=False, repr=False, default=None ) - confidence: Optional[SimpleNamespace] = dataclasses.field( + confidence: Any = dataclasses.field( init=False, repr=False, default=None ) - page_number: Optional[SimpleNamespace] = dataclasses.field( + page_number: Any = dataclasses.field( init=False, repr=False, default=None ) - page_width: Optional[SimpleNamespace] = dataclasses.field( + page_width: Any = dataclasses.field( init=False, repr=False, default=None ) - page_height: Optional[SimpleNamespace] = dataclasses.field( + page_height: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_width: Optional[SimpleNamespace] = dataclasses.field( + bounding_width: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_height: Optional[SimpleNamespace] = dataclasses.field( + bounding_height: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_type: Optional[SimpleNamespace] = dataclasses.field( + bounding_type: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_unit: Optional[SimpleNamespace] = dataclasses.field( + bounding_unit: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_x: Optional[SimpleNamespace] = dataclasses.field( + bounding_x: Any = dataclasses.field( init=False, repr=False, default=None ) - bounding_y: Optional[SimpleNamespace] = dataclasses.field( + bounding_y: Any = dataclasses.field( init=False, repr=False, default=None ) docproto_width: Optional[float] = dataclasses.field( @@ -180,7 +180,7 @@ def load_blocks_from_schema( blocks: List[Block] = [] ens = _get_target_object(objects, entities) - for i in ens: + for i in cast(Any, ens): entity = i block_text = "" @@ -203,11 +203,11 @@ def load_blocks_from_schema( b = Block( type_=block_type, text=block_text, - bounding_box=_get_target_object(entity, normalized_vertices), + bounding_box=_get_target_object(entity, normalized_vertices) if normalized_vertices is not None else None, ) if id_: - b.id_ = _get_target_object(entity, id_) + b.block_id = _get_target_object(entity, id_) if confidence: b.confidence = _get_target_object(entity, confidence) if page_number and page_number in entity: diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py index ffc7c1380632..e87b6fb8bca6 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/converter.py @@ -424,7 +424,7 @@ def convert_from_config( print("-------- Converting Started --------") files, labels, did_not_convert = _get_docproto_files( - futures_list, project_id, location, processor_id + list(futures_list), project_id, location, processor_id ) print("-------- Finished Converting --------") diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py index dbcfd74855e1..77f1c08e7fac 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/vision_helpers.py @@ -243,17 +243,17 @@ def _generate_entity_annotations( """ entity_annotations: List[EntityAnnotation] = [] for token in page_info.page.tokens: - v: vision.Vertex = [] + v: list[vision.Vertex] = [] if token.layout.bounding_poly.vertices: for vertex in token.layout.bounding_poly.vertices: - v.append({"x": int(vertex.x), "y": int(vertex.y)}) + v.append(vision.Vertex(x=int(vertex.x), y=int(vertex.y))) else: for normalized_vertex in token.layout.bounding_poly.normalized_vertices: v.append( - { - "x": int(normalized_vertex.x * page_info.page.dimension.width), - "y": int(normalized_vertex.y * page_info.page.dimension.height), - } + vision.Vertex( + x=int(normalized_vertex.x * page_info.page.dimension.width), + y=int(normalized_vertex.y * page_info.page.dimension.height), + ) ) text_start_index = token.layout.text_anchor.text_segments[0].start_index diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 4ed6e90d6fc3..65ed9a2a6fee 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -20,7 +20,7 @@ from google.api_core.gapic_v1 import client_info -from google.cloud import documentai, documentai_toolbox, storage +from google.cloud import documentai, documentai_toolbox, storage # type: ignore[attr-defined] from google.cloud.documentai_toolbox import constants @@ -91,6 +91,7 @@ def get_blobs( if gcs_uri: gcs_bucket_name, gcs_prefix = split_gcs_uri(gcs_uri) + assert gcs_prefix is not None if re.match(constants.FILE_CHECK_REGEX, gcs_prefix): raise ValueError("gcs_prefix cannot contain file types") diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py index 092cf2d9d307..9a811bee0b5f 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py @@ -22,7 +22,7 @@ import glob import os import re -from typing import Dict, Iterator, List, Optional, Type, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Type, Union from google.api_core.client_options import ClientOptions from google.api_core.operation import from_gapic as operation_from_gapic @@ -51,7 +51,7 @@ def _document_layout_blocks_from_shards( shards: List[documentai.Document], ) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]: def extract_blocks( - blocks: List[documentai.Document.DocumentLayout.DocumentLayoutBlock], + blocks: Iterable[documentai.Document.DocumentLayout.DocumentLayoutBlock], ) -> Iterator[documentai.Document.DocumentLayout.DocumentLayoutBlock]: queue = collections.deque(blocks) @@ -325,8 +325,9 @@ def _dict_to_bigquery( bq_client = bigquery.Client( project=project_id, client_info=gcs_utilities._get_client_info() ) + resolved_project_id = project_id or bq_client.project table_ref = bigquery.DatasetReference( - project=project_id, dataset_id=dataset_name + project=resolved_project_id, dataset_id=dataset_name ).table(table_name) job_config = bigquery.LoadJobConfig( @@ -345,7 +346,7 @@ def _dict_to_bigquery( def _apply_text_offset( - documentai_object: Union[Dict[str, Dict], List], text_offset: int + documentai_object: Union[Dict[str, Any], List[Any]], text_offset: int ) -> None: r"""Applies a text offset to all text_segments in `documentai_object`. diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py index 02b2e1ba306c..2057e32f32e6 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py @@ -63,7 +63,10 @@ class Entity: _image: Optional[Image.Image] = dataclasses.field(init=False, default=None) - def __post_init__(self, page_offset: int) -> None: + def __post_init__(self, page_offset: Optional[int]) -> None: + if page_offset is None: + page_offset = 0 + self.type_ = self.documentai_object.type_ if self.documentai_object.mention_text: diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py index 35a2491e634b..a73d688a6426 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py @@ -18,7 +18,9 @@ from abc import ABC import dataclasses from functools import cached_property -from typing import Iterable, List, Optional, Type +from typing import Iterable, List, Optional, Type, TypeVar + +T = TypeVar("T", bound="_BasePageElement") import pandas as pd @@ -181,8 +183,8 @@ def _text_segment(self) -> documentai.Document.TextAnchor.TextSegment: return self.documentai_object.layout.text_anchor.text_segments[0] def _get_children_of_element( - self, potential_children: List["_BasePageElement"] - ) -> List["_BasePageElement"]: + self, potential_children: List[T] + ) -> List[T]: """ Filters potential child elements to identify only those fully contained within this element. From cbbacda0f5731bbdb76a80be9fcb96bff366aef8 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 09:15:11 -0400 Subject: [PATCH 03/17] chore(documentai-toolbox): fix linting and formatting --- .../cloud/documentai_toolbox/__init__.py | 9 ++- .../converters/config/bbox_conversion.py | 7 ++- .../converters/config/block.py | 56 ++++++------------- .../utilities/gcs_utilities.py | 6 +- .../documentai_toolbox/wrappers/entity.py | 2 +- .../cloud/documentai_toolbox/wrappers/page.py | 7 +-- 6 files changed, 39 insertions(+), 48 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py index 781055edcae9..1c5cdba52088 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/__init__.py @@ -25,7 +25,14 @@ from .utilities import docai_utilities, gcs_utilities from .wrappers import document, entity, page -__all__ = ("document", "page", "entity", "converter", "docai_utilities", "gcs_utilities") +__all__ = ( + "document", + "page", + "entity", + "converter", + "docai_utilities", + "gcs_utilities", +) class Python37DeprecationWarning(DeprecationWarning): # pragma: NO COVER diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index b523b4b0798c..b25c1fbb769d 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -190,7 +190,12 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: y_multiplier = 1.0 normalized_vertices: List[documentai.NormalizedVertex] = [] - if block.page_width and block.page_height and block.docproto_width is not None and block.docproto_height is not None: + if ( + block.page_width + and block.page_height + and block.docproto_width is not None + and block.docproto_height is not None + ): x_multiplier = _get_multiplier( docproto_coordinate=block.docproto_width, external_coordinate=block.page_width, diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index e2d0bcdc22eb..effc0238668d 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -74,45 +74,19 @@ class Block: """ type_: Any = dataclasses.field(init=True, repr=False) text: Any = dataclasses.field(init=True, repr=False) - bounding_box: Any = dataclasses.field( - init=True, repr=False, default=None - ) - block_references: Any = dataclasses.field( - init=True, repr=False, default=None - ) - block_id: Any = dataclasses.field( - init=False, repr=False, default=None - ) - confidence: Any = dataclasses.field( - init=False, repr=False, default=None - ) - page_number: Any = dataclasses.field( - init=False, repr=False, default=None - ) - page_width: Any = dataclasses.field( - init=False, repr=False, default=None - ) - page_height: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_width: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_height: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_type: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_unit: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_x: Any = dataclasses.field( - init=False, repr=False, default=None - ) - bounding_y: Any = dataclasses.field( - init=False, repr=False, default=None - ) + bounding_box: Any = dataclasses.field(init=True, repr=False, default=None) + block_references: Any = dataclasses.field(init=True, repr=False, default=None) + block_id: Any = dataclasses.field(init=False, repr=False, default=None) + confidence: Any = dataclasses.field(init=False, repr=False, default=None) + page_number: Any = dataclasses.field(init=False, repr=False, default=None) + page_width: Any = dataclasses.field(init=False, repr=False, default=None) + page_height: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_width: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_height: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_type: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_unit: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_x: Any = dataclasses.field(init=False, repr=False, default=None) + bounding_y: Any = dataclasses.field(init=False, repr=False, default=None) docproto_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) @@ -203,7 +177,9 @@ def load_blocks_from_schema( b = Block( type_=block_type, text=block_text, - bounding_box=_get_target_object(entity, normalized_vertices) if normalized_vertices is not None else None, + bounding_box=_get_target_object(entity, normalized_vertices) + if normalized_vertices is not None + else None, ) if id_: diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 65ed9a2a6fee..8d6c2ac80337 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -20,7 +20,11 @@ from google.api_core.gapic_v1 import client_info -from google.cloud import documentai, documentai_toolbox, storage # type: ignore[attr-defined] +from google.cloud import ( + documentai, # type: ignore[attr-defined] + documentai_toolbox, + storage, +) from google.cloud.documentai_toolbox import constants diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py index 2057e32f32e6..f8e867f59d3c 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/entity.py @@ -66,7 +66,7 @@ class Entity: def __post_init__(self, page_offset: Optional[int]) -> None: if page_offset is None: page_offset = 0 - + self.type_ = self.documentai_object.type_ if self.documentai_object.mention_text: diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py index a73d688a6426..4d98de340b77 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py @@ -20,7 +20,6 @@ from functools import cached_property from typing import Iterable, List, Optional, Type, TypeVar -T = TypeVar("T", bound="_BasePageElement") import pandas as pd @@ -28,6 +27,8 @@ from google.cloud.documentai_toolbox.constants import ElementWithLayout from google.cloud.documentai_toolbox.utilities import docai_utilities +T = TypeVar("T", bound="_BasePageElement") + @dataclasses.dataclass class Table: @@ -182,9 +183,7 @@ def _text_segment(self) -> documentai.Document.TextAnchor.TextSegment: """ return self.documentai_object.layout.text_anchor.text_segments[0] - def _get_children_of_element( - self, potential_children: List[T] - ) -> List[T]: + def _get_children_of_element(self, potential_children: List[T]) -> List[T]: """ Filters potential child elements to identify only those fully contained within this element. From 20d9ae8a0dea830a9b220497b8c64f15fd523596 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 09:36:31 -0400 Subject: [PATCH 04/17] fix(documentai-toolbox): move type ignore to correct line in gcs_utilities --- .../cloud/documentai_toolbox/utilities/gcs_utilities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 8d6c2ac80337..0fd2970e98fe 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -20,8 +20,8 @@ from google.api_core.gapic_v1 import client_info -from google.cloud import ( - documentai, # type: ignore[attr-defined] +from google.cloud import ( # type: ignore[attr-defined] + documentai, documentai_toolbox, storage, ) From 67eb2981c6dc2dca66fb2302269e0bf39a3fb244 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 12:42:26 -0400 Subject: [PATCH 05/17] feat(documentai-toolbox): use ConfigOrData type alias for Block fields --- .../converters/config/block.py | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index effc0238668d..15a8eed9a592 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -17,10 +17,12 @@ import dataclasses import json from types import SimpleNamespace -from typing import Any, List, Optional, Type, cast +from typing import Any, List, Optional, Type, Union, cast from google.cloud import documentai +ConfigOrData = Union[SimpleNamespace, str, int, float, None] + def _get_target_object(json_data: Any, target_object: str) -> Any: r"""Returns SimpleNamespace of target_object. @@ -72,21 +74,21 @@ class Block: page_number: Optional. """ - type_: Any = dataclasses.field(init=True, repr=False) - text: Any = dataclasses.field(init=True, repr=False) - bounding_box: Any = dataclasses.field(init=True, repr=False, default=None) - block_references: Any = dataclasses.field(init=True, repr=False, default=None) - block_id: Any = dataclasses.field(init=False, repr=False, default=None) - confidence: Any = dataclasses.field(init=False, repr=False, default=None) - page_number: Any = dataclasses.field(init=False, repr=False, default=None) - page_width: Any = dataclasses.field(init=False, repr=False, default=None) - page_height: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_width: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_height: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_type: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_unit: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_x: Any = dataclasses.field(init=False, repr=False, default=None) - bounding_y: Any = dataclasses.field(init=False, repr=False, default=None) + type_: ConfigOrData = dataclasses.field(init=True, repr=False) + text: ConfigOrData = dataclasses.field(init=True, repr=False) + bounding_box: ConfigOrData = dataclasses.field(init=True, repr=False, default=None) + block_references: ConfigOrData = dataclasses.field(init=True, repr=False, default=None) + block_id: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + confidence: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + page_number: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + page_width: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + page_height: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_width: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_height: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_type: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_unit: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_x: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_y: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) docproto_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) From 3ca1496fabc8f72df9aa23a9fbd113511bacf1d5 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 12:45:08 -0400 Subject: [PATCH 06/17] feat(documentai-toolbox): update ConfigOrData to include List and use as return type for _get_target_object --- .../cloud/documentai_toolbox/converters/config/block.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 15a8eed9a592..f17f12699331 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -21,10 +21,10 @@ from google.cloud import documentai -ConfigOrData = Union[SimpleNamespace, str, int, float, None] +ConfigOrData = Union[SimpleNamespace, str, int, float, List[Any], None] -def _get_target_object(json_data: Any, target_object: str) -> Any: +def _get_target_object(json_data: Any, target_object: str) -> ConfigOrData: r"""Returns SimpleNamespace of target_object. Args: From c2c4e4e52632f7346f4f8dd6aaec2124b957c4de Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 12:46:41 -0400 Subject: [PATCH 07/17] fix(documentai-toolbox): replace assert with explicit check in gcs_utilities.py --- .../google/cloud/documentai_toolbox/utilities/gcs_utilities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 0fd2970e98fe..4c285d108241 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -95,7 +95,8 @@ def get_blobs( if gcs_uri: gcs_bucket_name, gcs_prefix = split_gcs_uri(gcs_uri) - assert gcs_prefix is not None + if gcs_prefix is None: + raise TypeError("gcs_prefix cannot be None") if re.match(constants.FILE_CHECK_REGEX, gcs_prefix): raise ValueError("gcs_prefix cannot contain file types") From 1dea66977d81c9b0f3a5780b5a0b92682da2e2d2 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 12:51:24 -0400 Subject: [PATCH 08/17] fix(documentai-toolbox): replace redundant cast with isinstance check in block.py --- .../cloud/documentai_toolbox/converters/config/block.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index f17f12699331..6e1cd1fc8a73 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -156,7 +156,9 @@ def load_blocks_from_schema( blocks: List[Block] = [] ens = _get_target_object(objects, entities) - for i in cast(Any, ens): + if not isinstance(ens, list): + raise TypeError("Expected list for entities") + for i in ens: entity = i block_text = "" From 5c1fe1182ed711777ccd45fb7b8a55f1ea6151c0 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 12:52:48 -0400 Subject: [PATCH 09/17] style: blacken block.py --- .../converters/config/block.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 6e1cd1fc8a73..17001a895434 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -77,16 +77,26 @@ class Block: type_: ConfigOrData = dataclasses.field(init=True, repr=False) text: ConfigOrData = dataclasses.field(init=True, repr=False) bounding_box: ConfigOrData = dataclasses.field(init=True, repr=False, default=None) - block_references: ConfigOrData = dataclasses.field(init=True, repr=False, default=None) + block_references: ConfigOrData = dataclasses.field( + init=True, repr=False, default=None + ) block_id: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) confidence: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) page_number: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) page_width: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) page_height: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_width: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_height: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_type: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_unit: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_width: ConfigOrData = dataclasses.field( + init=False, repr=False, default=None + ) + bounding_height: ConfigOrData = dataclasses.field( + init=False, repr=False, default=None + ) + bounding_type: ConfigOrData = dataclasses.field( + init=False, repr=False, default=None + ) + bounding_unit: ConfigOrData = dataclasses.field( + init=False, repr=False, default=None + ) bounding_x: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) bounding_y: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) docproto_width: Optional[float] = dataclasses.field( From 94a5f3e4a08f3172295b75db61909dfb942fecb7 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:04:53 -0400 Subject: [PATCH 10/17] fix(documentai-toolbox): use specific types for Block fields and revert _get_target_object return type --- .../converters/config/block.py | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 17001a895434..325e60129dfb 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -21,10 +21,7 @@ from google.cloud import documentai -ConfigOrData = Union[SimpleNamespace, str, int, float, List[Any], None] - - -def _get_target_object(json_data: Any, target_object: str) -> ConfigOrData: +def _get_target_object(json_data: Any, target_object: str) -> Any: r"""Returns SimpleNamespace of target_object. Args: @@ -74,31 +71,31 @@ class Block: page_number: Optional. """ - type_: ConfigOrData = dataclasses.field(init=True, repr=False) - text: ConfigOrData = dataclasses.field(init=True, repr=False) - bounding_box: ConfigOrData = dataclasses.field(init=True, repr=False, default=None) - block_references: ConfigOrData = dataclasses.field( + type_: Any = dataclasses.field(init=True, repr=False) + text: str = dataclasses.field(init=True, repr=False) + bounding_box: Optional[SimpleNamespace] = dataclasses.field(init=True, repr=False, default=None) + block_references: Any = dataclasses.field( init=True, repr=False, default=None ) - block_id: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - confidence: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - page_number: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - page_width: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - page_height: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_width: ConfigOrData = dataclasses.field( + block_id: Optional[str] = dataclasses.field(init=False, repr=False, default=None) + confidence: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + page_number: Optional[int] = dataclasses.field(init=False, repr=False, default=None) + page_width: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + page_height: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + bounding_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) - bounding_height: ConfigOrData = dataclasses.field( + bounding_height: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) - bounding_type: ConfigOrData = dataclasses.field( + bounding_type: Optional[str] = dataclasses.field( init=False, repr=False, default=None ) - bounding_unit: ConfigOrData = dataclasses.field( + bounding_unit: Optional[str] = dataclasses.field( init=False, repr=False, default=None ) - bounding_x: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) - bounding_y: ConfigOrData = dataclasses.field(init=False, repr=False, default=None) + bounding_x: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + bounding_y: Optional[float] = dataclasses.field(init=False, repr=False, default=None) docproto_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) From 1a972f05aa926115c58d6652ef6cd8a0d3a880c6 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:07:49 -0400 Subject: [PATCH 11/17] fix(documentai-toolbox): fix mypy errors in block.py and bbox_conversion.py by narrowing types --- .../converters/config/bbox_conversion.py | 24 +++++++++++-------- .../converters/config/block.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index b25c1fbb769d..1657f23ada82 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -199,12 +199,12 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: x_multiplier = _get_multiplier( docproto_coordinate=block.docproto_width, external_coordinate=block.page_width, - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", ) y_multiplier = _get_multiplier( docproto_coordinate=block.docproto_height, external_coordinate=block.page_height, - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", ) if block.bounding_type == "1": @@ -213,13 +213,13 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: for coordinate in block.bounding_box: x = _convert_bbox_units( coordinate[f"{block.bounding_x}"], - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", width=block.docproto_width, multiplier=x_multiplier, ) y = _convert_bbox_units( coordinate[f"{block.bounding_y}"], - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", height=block.docproto_height, multiplier=y_multiplier, ) @@ -229,15 +229,17 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: elif block.bounding_type == "2": # Type 2 : bounding box has 1 (x,y) coordinates for the top left corner # and (width, height) + if not isinstance(block.bounding_box, SimpleNamespace): + raise TypeError("Expected SimpleNamespace for bounding_box in Type 2") x_min = _convert_bbox_units( - block.bounding_box[f"{block.bounding_x}"], - input_bbox_units=block.bounding_unit, + getattr(block.bounding_box, f"{block.bounding_x}"), + input_bbox_units=block.bounding_unit or "normalized", width=block.page_width, multiplier=x_multiplier, ) y_min = _convert_bbox_units( - block.bounding_box[f"{block.bounding_y}"], - input_bbox_units=block.bounding_unit, + getattr(block.bounding_box, f"{block.bounding_y}"), + input_bbox_units=block.bounding_unit or "normalized", width=block.page_height, multiplier=y_multiplier, ) @@ -254,16 +256,18 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: elif block.bounding_type == "3": # Type 3 : bounding_box: [x1, y1, x2, y2, x3, y3, x4, y4] + if not isinstance(block.bounding_box, list): + raise TypeError("Expected list for bounding_box in Type 3") for idx in range(0, len(block.bounding_box), 2): x = _convert_bbox_units( block.bounding_box[idx], - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", width=block.docproto_width, multiplier=x_multiplier, ) y = _convert_bbox_units( block.bounding_box[idx + 1], - input_bbox_units=block.bounding_unit, + input_bbox_units=block.bounding_unit or "normalized", width=block.docproto_height, multiplier=y_multiplier, ) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 325e60129dfb..2ddb7108c4e5 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -73,7 +73,7 @@ class Block: """ type_: Any = dataclasses.field(init=True, repr=False) text: str = dataclasses.field(init=True, repr=False) - bounding_box: Optional[SimpleNamespace] = dataclasses.field(init=True, repr=False, default=None) + bounding_box: Optional[Union[SimpleNamespace, List[Any]]] = dataclasses.field(init=True, repr=False, default=None) block_references: Any = dataclasses.field( init=True, repr=False, default=None ) From 7126d341193780e93b7c0fcbea7f0db0110436ab Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:09:07 -0400 Subject: [PATCH 12/17] fix(documentai-toolbox): fix remaining mypy errors in bbox_conversion.py --- .../documentai_toolbox/converters/config/bbox_conversion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index 1657f23ada82..7d55953daf85 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -14,6 +14,7 @@ # limitations under the License. # +from types import SimpleNamespace from typing import Callable, List, Optional from intervaltree import intervaltree @@ -243,6 +244,8 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: width=block.page_height, multiplier=y_multiplier, ) + if block.bounding_width is None or block.bounding_height is None: + raise ValueError("bounding_width and bounding_height must be set for Type 2") x_max = x_min + block.bounding_width y_max = y_min + block.bounding_height normalized_vertices.extend( From f8312eb3276b7387659b71667b5135aeaa0ca33a Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:10:57 -0400 Subject: [PATCH 13/17] style: blacken and format documentai-toolbox files --- .../converters/config/bbox_conversion.py | 4 ++- .../converters/config/block.py | 27 +++++++++++++------ .../utilities/gcs_utilities.py | 4 +-- .../cloud/documentai_toolbox/wrappers/page.py | 1 - 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index 7d55953daf85..7c656a2be233 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -245,7 +245,9 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: multiplier=y_multiplier, ) if block.bounding_width is None or block.bounding_height is None: - raise ValueError("bounding_width and bounding_height must be set for Type 2") + raise ValueError( + "bounding_width and bounding_height must be set for Type 2" + ) x_max = x_min + block.bounding_width y_max = y_min + block.bounding_height normalized_vertices.extend( diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index 2ddb7108c4e5..aba77ca22114 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -17,10 +17,11 @@ import dataclasses import json from types import SimpleNamespace -from typing import Any, List, Optional, Type, Union, cast +from typing import Any, List, Optional, Type, Union from google.cloud import documentai + def _get_target_object(json_data: Any, target_object: str) -> Any: r"""Returns SimpleNamespace of target_object. @@ -73,15 +74,21 @@ class Block: """ type_: Any = dataclasses.field(init=True, repr=False) text: str = dataclasses.field(init=True, repr=False) - bounding_box: Optional[Union[SimpleNamespace, List[Any]]] = dataclasses.field(init=True, repr=False, default=None) - block_references: Any = dataclasses.field( + bounding_box: Optional[Union[SimpleNamespace, List[Any]]] = dataclasses.field( init=True, repr=False, default=None ) + block_references: Any = dataclasses.field(init=True, repr=False, default=None) block_id: Optional[str] = dataclasses.field(init=False, repr=False, default=None) - confidence: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + confidence: Optional[float] = dataclasses.field( + init=False, repr=False, default=None + ) page_number: Optional[int] = dataclasses.field(init=False, repr=False, default=None) - page_width: Optional[float] = dataclasses.field(init=False, repr=False, default=None) - page_height: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + page_width: Optional[float] = dataclasses.field( + init=False, repr=False, default=None + ) + page_height: Optional[float] = dataclasses.field( + init=False, repr=False, default=None + ) bounding_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) @@ -94,8 +101,12 @@ class Block: bounding_unit: Optional[str] = dataclasses.field( init=False, repr=False, default=None ) - bounding_x: Optional[float] = dataclasses.field(init=False, repr=False, default=None) - bounding_y: Optional[float] = dataclasses.field(init=False, repr=False, default=None) + bounding_x: Optional[float] = dataclasses.field( + init=False, repr=False, default=None + ) + bounding_y: Optional[float] = dataclasses.field( + init=False, repr=False, default=None + ) docproto_width: Optional[float] = dataclasses.field( init=False, repr=False, default=None ) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 4c285d108241..93700ca3156d 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -20,8 +20,8 @@ from google.api_core.gapic_v1 import client_info -from google.cloud import ( # type: ignore[attr-defined] - documentai, +from google.cloud import ( + documentai, # type: ignore[attr-defined] documentai_toolbox, storage, ) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py index 4d98de340b77..3dd9b9d68876 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/page.py @@ -20,7 +20,6 @@ from functools import cached_property from typing import Iterable, List, Optional, Type, TypeVar - import pandas as pd from google.cloud import documentai From bddf806b5942fe69e48e45c94949c77c9d5c09da Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:43:22 -0400 Subject: [PATCH 14/17] fix(documentai-toolbox): expect dict for bounding_box in Type 2 in bbox_conversion.py --- .../converters/config/bbox_conversion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index 7c656a2be233..10df4b66425f 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -230,16 +230,16 @@ def convert_bbox_to_docproto_bbox(block: Block) -> documentai.BoundingPoly: elif block.bounding_type == "2": # Type 2 : bounding box has 1 (x,y) coordinates for the top left corner # and (width, height) - if not isinstance(block.bounding_box, SimpleNamespace): - raise TypeError("Expected SimpleNamespace for bounding_box in Type 2") + if not isinstance(block.bounding_box, dict): + raise TypeError("Expected dict for bounding_box in Type 2") x_min = _convert_bbox_units( - getattr(block.bounding_box, f"{block.bounding_x}"), + block.bounding_box[f"{block.bounding_x}"], input_bbox_units=block.bounding_unit or "normalized", width=block.page_width, multiplier=x_multiplier, ) y_min = _convert_bbox_units( - getattr(block.bounding_box, f"{block.bounding_y}"), + block.bounding_box[f"{block.bounding_y}"], input_bbox_units=block.bounding_unit or "normalized", width=block.page_height, multiplier=y_multiplier, From 49a17b1e5798165d39a87df9ddc656df7541df9d Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:43:33 -0400 Subject: [PATCH 15/17] fix(documentai-toolbox): allow both list and dict for entities in block.py --- .../cloud/documentai_toolbox/converters/config/block.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py index aba77ca22114..2fbf97237a0c 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/block.py @@ -174,8 +174,8 @@ def load_blocks_from_schema( blocks: List[Block] = [] ens = _get_target_object(objects, entities) - if not isinstance(ens, list): - raise TypeError("Expected list for entities") + if not isinstance(ens, (list, dict)): + raise TypeError("Expected list or dict for entities") for i in ens: entity = i From 410c1adce3931dce5cea3a3083b56b2b3e5d0bae Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:46:08 -0400 Subject: [PATCH 16/17] fix(documentai-toolbox): fix mypy error for storage import in gcs_utilities.py --- .../cloud/documentai_toolbox/utilities/gcs_utilities.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py index 93700ca3156d..3f2c8a487c22 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/utilities/gcs_utilities.py @@ -20,11 +20,9 @@ from google.api_core.gapic_v1 import client_info -from google.cloud import ( - documentai, # type: ignore[attr-defined] - documentai_toolbox, - storage, -) +from google.cloud import documentai # type: ignore[attr-defined] +from google.cloud import documentai_toolbox +from google.cloud import storage # type: ignore[attr-defined] from google.cloud.documentai_toolbox import constants From ff8695d807a029875b812745a9979f29603046a2 Mon Sep 17 00:00:00 2001 From: chalmer lowe Date: Fri, 17 Apr 2026 13:48:11 -0400 Subject: [PATCH 17/17] fix(lint): remove unused SimpleNamespace import in bbox_conversion.py --- .../documentai_toolbox/converters/config/bbox_conversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py index 10df4b66425f..fe3a45a9328b 100644 --- a/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py +++ b/packages/google-cloud-documentai-toolbox/google/cloud/documentai_toolbox/converters/config/bbox_conversion.py @@ -14,7 +14,7 @@ # limitations under the License. # -from types import SimpleNamespace + from typing import Callable, List, Optional from intervaltree import intervaltree