diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 69f759d8463d..91226bd08ee3 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", "pr": "38069", - "modification": 40 + "modification": 41 } diff --git a/sdks/python/apache_beam/coders/coder_impl.py b/sdks/python/apache_beam/coders/coder_impl.py index cd66a0c09e01..0bded25e05d2 100644 --- a/sdks/python/apache_beam/coders/coder_impl.py +++ b/sdks/python/apache_beam/coders/coder_impl.py @@ -30,6 +30,7 @@ """ # pytype: skip-file +# ruff: noqa: UP006 import dataclasses import decimal import enum diff --git a/sdks/python/apache_beam/coders/coders.py b/sdks/python/apache_beam/coders/coders.py index 556b18043189..5f22bff5351b 100644 --- a/sdks/python/apache_beam/coders/coders.py +++ b/sdks/python/apache_beam/coders/coders.py @@ -43,13 +43,9 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Dict from typing import Iterable -from typing import List from typing import Optional from typing import Sequence -from typing import Tuple -from typing import Type from typing import TypeVar from typing import overload @@ -68,7 +64,6 @@ from apache_beam.utils import windowed_value if TYPE_CHECKING: - from apache_beam.coders.typecoders import CoderRegistry from apache_beam.runners.pipeline_context import PipelineContext # pylint: disable=wrong-import-order, wrong-import-position, ungrouped-imports @@ -122,7 +117,7 @@ T = TypeVar('T') CoderT = TypeVar('CoderT', bound='Coder') ProtoCoderT = TypeVar('ProtoCoderT', bound='ProtoCoder') -ConstructorFn = Callable[[Optional[Any], List['Coder'], 'PipelineContext'], Any] +ConstructorFn = Callable[[Optional[Any], list['Coder'], 'PipelineContext'], Any] def serialize_coder(coder): @@ -1508,7 +1503,7 @@ def __hash__(self): class _OrderedUnionCoder(FastCoder): def __init__( - self, *coder_types: Tuple[type, Coder], fallback_coder: Optional[Coder]): + self, *coder_types: tuple[type, Coder], fallback_coder: Optional[Coder]): self._coder_types = coder_types self._fallback_coder = fallback_coder @@ -1816,7 +1811,7 @@ def _create_impl(self): def to_type_hint(self): return self._window_coder.to_type_hint() - def _get_component_coders(self) -> List[Coder]: + def _get_component_coders(self) -> list[Coder]: return [self._window_coder] def is_deterministic(self) -> bool: diff --git a/sdks/python/apache_beam/coders/coders_test_common.py b/sdks/python/apache_beam/coders/coders_test_common.py index 5e5cfc8a5b62..422d494b61c7 100644 --- a/sdks/python/apache_beam/coders/coders_test_common.py +++ b/sdks/python/apache_beam/coders/coders_test_common.py @@ -31,7 +31,6 @@ import unittest from decimal import Decimal from typing import Any -from typing import List from typing import NamedTuple import pytest @@ -145,7 +144,7 @@ class CodersTest(unittest.TestCase): # nested and unnested context. # Common test values representing Python's built-in types. - test_values_deterministic: List[Any] = [ + test_values_deterministic: list[Any] = [ None, 1, -1, diff --git a/sdks/python/apache_beam/coders/observable_test.py b/sdks/python/apache_beam/coders/observable_test.py index df4e7ef09408..ffe2ac8fed0e 100644 --- a/sdks/python/apache_beam/coders/observable_test.py +++ b/sdks/python/apache_beam/coders/observable_test.py @@ -20,7 +20,6 @@ import logging import unittest -from typing import List from typing import Optional from apache_beam.coders import observable @@ -29,7 +28,7 @@ class ObservableMixinTest(unittest.TestCase): observed_count = 0 observed_sum = 0 - observed_keys: List[Optional[str]] = [] + observed_keys: list[Optional[str]] = [] def observer(self, value, key=None): self.observed_count += 1 diff --git a/sdks/python/apache_beam/coders/row_coder_test.py b/sdks/python/apache_beam/coders/row_coder_test.py index c2176e99a999..396cdc9b3f66 100644 --- a/sdks/python/apache_beam/coders/row_coder_test.py +++ b/sdks/python/apache_beam/coders/row_coder_test.py @@ -43,7 +43,7 @@ ("name", str), ("age", np.int32), ("address", typing.Optional[str]), - ("aliases", typing.List[str]), + ("aliases", list[str]), ("knows_javascript", bool), ("payload", typing.Optional[bytes]), ("custom_metadata", typing.Mapping[str, int]), @@ -53,7 +53,7 @@ NullablePerson = typing.NamedTuple( "NullablePerson", [("name", typing.Optional[str]), ("age", np.int32), - ("address", typing.Optional[str]), ("aliases", typing.List[str]), + ("address", typing.Optional[str]), ("aliases", list[str]), ("knows_javascript", bool), ("payload", typing.Optional[bytes]), ("custom_metadata", typing.Mapping[str, int]), ("favorite_time", typing.Optional[Timestamp]), diff --git a/sdks/python/apache_beam/coders/slow_stream.py b/sdks/python/apache_beam/coders/slow_stream.py index fb4aa50f233d..c1843bbe957d 100644 --- a/sdks/python/apache_beam/coders/slow_stream.py +++ b/sdks/python/apache_beam/coders/slow_stream.py @@ -22,7 +22,6 @@ # pytype: skip-file import struct -from typing import List class OutputStream(object): @@ -30,7 +29,7 @@ class OutputStream(object): A pure Python implementation of stream.OutputStream.""" def __init__(self): - self.data: List[bytes] = [] + self.data: list[bytes] = [] self.byte_count = 0 def write(self, b: bytes, nested: bool = False) -> None: diff --git a/sdks/python/apache_beam/coders/standard_coders_test.py b/sdks/python/apache_beam/coders/standard_coders_test.py index b6f0dbf12208..e2301ada19ea 100644 --- a/sdks/python/apache_beam/coders/standard_coders_test.py +++ b/sdks/python/apache_beam/coders/standard_coders_test.py @@ -26,8 +26,6 @@ import sys import unittest from copy import deepcopy -from typing import Dict -from typing import Tuple import numpy as np import yaml @@ -283,7 +281,7 @@ def json_value_parser(self, coder_spec): # Used when --fix is passed. fix = False - to_fix: Dict[Tuple[int, bytes], bytes] = {} + to_fix: dict[tuple[int, bytes], bytes] = {} @classmethod def tearDownClass(cls): diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index 9ecb14c60f75..cf594475c87d 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -66,10 +66,7 @@ def MakeXyzs(v): # pytype: skip-file from typing import Any -from typing import Dict from typing import Iterable -from typing import List -from typing import Type from apache_beam.coders import coders from apache_beam.typehints import typehints @@ -81,8 +78,8 @@ def MakeXyzs(v): class CoderRegistry(object): """A coder registry for typehint/coder associations.""" def __init__(self, fallback_coder=None): - self._coders: Dict[Any, Type[coders.Coder]] = {} - self.custom_types: List[Any] = [] + self._coders: dict[Any, type[coders.Coder]] = {} + self.custom_types: list[Any] = [] self.register_standard_coders(fallback_coder) def register_standard_coders(self, fallback_coder): @@ -110,7 +107,7 @@ def register_fallback_coder(self, fallback_coder): def _register_coder_internal( self, typehint_type: Any, - typehint_coder_class: Type[coders.Coder]) -> None: + typehint_coder_class: type[coders.Coder]) -> None: self._coders[typehint_type] = typehint_coder_class @staticmethod @@ -123,7 +120,7 @@ def _normalize_typehint_type(typehint_type): def register_coder( self, typehint_type: Any, - typehint_coder_class: Type[coders.Coder]) -> None: + typehint_coder_class: type[coders.Coder]) -> None: """ Register a user type with a coder. @@ -244,7 +241,7 @@ class FirstOf(object): """For internal use only; no backwards-compatibility guarantees. A class used to get the first matching coder from a list of coders.""" - def __init__(self, coders: Iterable[Type[coders.Coder]]) -> None: + def __init__(self, coders: Iterable[type[coders.Coder]]) -> None: self._coders = coders def from_type_hint(self, typehint, registry): diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py index 8e206fc5e037..f5bf024b177b 100644 --- a/sdks/python/apache_beam/dataframe/frame_base.py +++ b/sdks/python/apache_beam/dataframe/frame_base.py @@ -25,7 +25,6 @@ from inspect import unwrap from typing import Any from typing import Optional -from typing import Tuple from typing import Union import pandas as pd @@ -163,7 +162,7 @@ def binop(self, other): DeferredBase._pandas_type_map[None] = _DeferredScalar -def name_and_func(method: Union[str, Callable]) -> Tuple[str, Callable]: +def name_and_func(method: Union[str, Callable]) -> tuple[str, Callable]: """For the given method name or method, return the method name and the method itself. diff --git a/sdks/python/apache_beam/dataframe/schemas_test.py b/sdks/python/apache_beam/dataframe/schemas_test.py index 4c196e29e712..135d9ca21b3d 100644 --- a/sdks/python/apache_beam/dataframe/schemas_test.py +++ b/sdks/python/apache_beam/dataframe/schemas_test.py @@ -64,57 +64,36 @@ def check_df_pcoll_equal(actual): # pd.Series([b'abc'], dtype=bytes).dtype != 'S' # pd.Series([b'abc'], dtype=bytes).astype(bytes).dtype == 'S' # (test data, pandas_type, column_name, beam_type) -COLUMNS: typing.List[typing.Tuple[typing.List[typing.Any], - typing.Any, - str, - typing.Any]] = [ - ([375, 24, 0, 10, 16], - np.int32, - 'i32', - np.int32), - ([375, 24, 0, 10, 16], - np.int64, - 'i64', - np.int64), - ([375, 24, None, 10, 16], - pd.Int32Dtype(), - 'i32_nullable', - typing.Optional[np.int32]), - ([375, 24, None, 10, 16], - pd.Int64Dtype(), - 'i64_nullable', - typing.Optional[np.int64]), - ([375., 24., None, 10., 16.], - np.float64, - 'f64', - typing.Optional[np.float64]), - ([375., 24., None, 10., 16.], - np.float32, - 'f32', - typing.Optional[np.float32]), - ([True, False, True, True, False], - bool, - 'bool', - bool), - (['Falcon', 'Ostrich', None, 3.14, 0], - object, - 'any', - typing.Any), - ([True, False, True, None, False], - pd.BooleanDtype(), - 'bool_nullable', - typing.Optional[bool]), - ([ - 'Falcon', - 'Ostrich', - None, - 'Aardvark', - 'Elephant' - ], - pd.StringDtype(), - 'strdtype', - typing.Optional[str]), - ] +COLUMNS: list[tuple[list[typing.Any], typing.Any, str, typing.Any]] = [ + ([375, 24, 0, 10, 16], np.int32, 'i32', np.int32), + ([375, 24, 0, 10, 16], np.int64, 'i64', np.int64), + ([375, 24, None, 10, 16], + pd.Int32Dtype(), + 'i32_nullable', + typing.Optional[np.int32]), + ([375, 24, None, 10, 16], + pd.Int64Dtype(), + 'i64_nullable', + typing.Optional[np.int64]), + ([375., 24., None, 10., 16.], + np.float64, + 'f64', + typing.Optional[np.float64]), + ([375., 24., None, 10., 16.], + np.float32, + 'f32', + typing.Optional[np.float32]), + ([True, False, True, True, False], bool, 'bool', bool), + (['Falcon', 'Ostrich', None, 3.14, 0], object, 'any', typing.Any), + ([True, False, True, None, False], + pd.BooleanDtype(), + 'bool_nullable', + typing.Optional[bool]), + (['Falcon', 'Ostrich', None, 'Aardvark', 'Elephant'], + pd.StringDtype(), + 'strdtype', + typing.Optional[str]), +] NICE_TYPES_DF = pd.DataFrame(columns=[name for _, _, name, _ in COLUMNS]) for arr, dtype, name, _ in COLUMNS: @@ -125,9 +104,7 @@ def check_df_pcoll_equal(actual): SERIES_TESTS = [(pd.Series(arr, dtype=dtype, name=name), arr, beam_type) for (arr, dtype, name, beam_type) in COLUMNS] -_TEST_ARRAYS: typing.List[typing.List[typing.Any]] = [ - arr for (arr, _, _, _) in COLUMNS -] +_TEST_ARRAYS: list[list[typing.Any]] = [arr for (arr, _, _, _) in COLUMNS] DF_RESULT = list(zip(*_TEST_ARRAYS)) BEAM_SCHEMA = typing.NamedTuple( # type: ignore 'BEAM_SCHEMA', [(name, beam_type) for _, _, name, beam_type in COLUMNS]) diff --git a/sdks/python/apache_beam/examples/inference/multi_language_inference/multi_language_custom_transform/multi_language_custom_transform/composite_transform.py b/sdks/python/apache_beam/examples/inference/multi_language_inference/multi_language_custom_transform/multi_language_custom_transform/composite_transform.py index 3e1794bc5829..8fbfed085573 100644 --- a/sdks/python/apache_beam/examples/inference/multi_language_inference/multi_language_custom_transform/multi_language_custom_transform/composite_transform.py +++ b/sdks/python/apache_beam/examples/inference/multi_language_inference/multi_language_custom_transform/multi_language_custom_transform/composite_transform.py @@ -15,7 +15,6 @@ # import logging -import signal import typing import apache_beam as beam @@ -84,7 +83,7 @@ def __init__(self, bert_tokenizer): self.bert_tokenizer = bert_tokenizer logging.info('Starting Postprocess') - def process(self, element: typing.Tuple[str, PredictionResult]) \ + def process(self, element: tuple[str, PredictionResult]) \ -> typing.Iterable[str]: text, prediction_result = element inputs = prediction_result.example diff --git a/sdks/python/apache_beam/examples/inference/tfx_bsl/build_tensorflow_model.py b/sdks/python/apache_beam/examples/inference/tfx_bsl/build_tensorflow_model.py index 9230f84955eb..a8541c01a410 100644 --- a/sdks/python/apache_beam/examples/inference/tfx_bsl/build_tensorflow_model.py +++ b/sdks/python/apache_beam/examples/inference/tfx_bsl/build_tensorflow_model.py @@ -17,7 +17,6 @@ # Intended only for internal testing. -from typing import Dict from typing import Optional import tensorflow as tf @@ -114,7 +113,7 @@ def save_tf_model_with_signature( model=None, preprocess_input=None, input_dtype=tf.float32, - feature_description: Optional[Dict] = None, + feature_description: Optional[dict] = None, **kwargs, ): """ diff --git a/sdks/python/apache_beam/examples/snippets/snippets_test.py b/sdks/python/apache_beam/examples/snippets/snippets_test.py index d7dd5e6af191..e4f2d16ac144 100644 --- a/sdks/python/apache_beam/examples/snippets/snippets_test.py +++ b/sdks/python/apache_beam/examples/snippets/snippets_test.py @@ -333,12 +333,12 @@ def process(self, element): # One can assert outputs and apply them to transforms as well. # Helps document the contract and checks it at pipeline construction time. # [START type_hints_transform] - from typing import Tuple, TypeVar + from typing import TypeVar T = TypeVar('T') @beam.typehints.with_input_types(T) - @beam.typehints.with_output_types(Tuple[int, T]) + @beam.typehints.with_output_types(tuple[int, T]) class MyTransform(beam.PTransform): def expand(self, pcoll): return pcoll | beam.Map(lambda x: (len(x), x)) @@ -351,7 +351,7 @@ def expand(self, pcoll): # pylint: disable=expression-not-assigned with self.assertRaises(typehints.TypeCheckError): - words_with_lens | beam.Map(lambda x: x).with_input_types(Tuple[int, int]) + words_with_lens | beam.Map(lambda x: x).with_input_types(tuple[int, int]) def test_bad_types_annotations(self): p = TestPipeline(options=PipelineOptions(pipeline_type_check=True)) @@ -394,10 +394,10 @@ def process(self, element: int) -> Iterable[int]: # annotation has an additional Optional for the else clause. with self.assertRaises(typehints.TypeCheckError): # [START type_hints_do_fn_annotations_optional] - from typing import List, Optional + from typing import Optional class FilterEvensDoubleDoFn(beam.DoFn): - def process(self, element: int) -> Optional[List[int]]: + def process(self, element: int) -> Optional[list[int]]: if element % 2 == 0: return [element, element] return None @@ -461,7 +461,6 @@ def test_deterministic_key(self): global Player # pylint: disable=global-variable-not-assigned # [START type_hints_deterministic_key] - from typing import Tuple class Player(object): def __init__(self, team, name): @@ -487,7 +486,7 @@ def parse_player_and_score(csv): totals = ( lines | beam.Map(parse_player_and_score) - | beam.CombinePerKey(sum).with_input_types(Tuple[Player, int])) + | beam.CombinePerKey(sum).with_input_types(tuple[Player, int])) # [END type_hints_deterministic_key] assert_that( diff --git a/sdks/python/apache_beam/internal/dill_pickler.py b/sdks/python/apache_beam/internal/dill_pickler.py index e88cb3c1e138..60e309ae3a6b 100644 --- a/sdks/python/apache_beam/internal/dill_pickler.py +++ b/sdks/python/apache_beam/internal/dill_pickler.py @@ -39,8 +39,6 @@ import types import zlib from typing import Any -from typing import Dict -from typing import Tuple import dill @@ -50,7 +48,7 @@ settings = {'dill_byref': None} -patch_save_code = sys.version_info >= (3, 10) and dill.__version__ == "0.3.1.1" +patch_save_code = dill.__version__ == "0.3.1.1" if patch_save_code: # The following function is based on 'save_code' from 'dill' @@ -315,7 +313,7 @@ def save_module(pickler, obj): # Pickle module dictionaries (commonly found in lambda's globals) # by referencing their module. old_save_module_dict = dill.dill.save_module_dict - known_module_dicts: Dict[int, Tuple[types.ModuleType, Dict[str, Any]]] = {} + known_module_dicts: dict[int, tuple[types.ModuleType, dict[str, Any]]] = {} @dill.dill.register(dict) def new_save_module_dict(pickler, obj): diff --git a/sdks/python/apache_beam/internal/metrics/metric.py b/sdks/python/apache_beam/internal/metrics/metric.py index 6f6788e059bd..85d63c2b6f6b 100644 --- a/sdks/python/apache_beam/internal/metrics/metric.py +++ b/sdks/python/apache_beam/internal/metrics/metric.py @@ -30,9 +30,7 @@ import threading import time from typing import TYPE_CHECKING -from typing import Dict from typing import Optional -from typing import Type from typing import Union from apache_beam.metrics import monitoring_infos @@ -59,7 +57,7 @@ class Metrics(object): @staticmethod def counter( urn: str, - labels: Optional[Dict[str, str]] = None, + labels: Optional[dict[str, str]] = None, process_wide: bool = False) -> UserMetrics.DelegatingCounter: """Obtains or creates a Counter metric. @@ -82,14 +80,14 @@ def counter( class MetricLogger(object): """Simple object to locally aggregate and log metrics.""" def __init__(self) -> None: - self._metric: Dict[MetricName, 'MetricCell'] = {} + self._metric: dict[MetricName, 'MetricCell'] = {} self._lock = threading.Lock() self._last_logging_millis = int(time.time() * 1000) self.minimum_logging_frequency_msec = 180000 def update( self, - cell_type: Union[Type['MetricCell'], 'MetricCellFactory'], + cell_type: Union[type['MetricCell'], 'MetricCellFactory'], metric_name: MetricName, value: object) -> None: cell = self._get_metric_cell(cell_type, metric_name) @@ -97,7 +95,7 @@ def update( def _get_metric_cell( self, - cell_type: Union[Type['MetricCell'], 'MetricCellFactory'], + cell_type: Union[type['MetricCell'], 'MetricCellFactory'], metric_name: MetricName) -> 'MetricCell': with self._lock: if metric_name not in self._metric: @@ -139,7 +137,7 @@ class ServiceCallMetric(object): def __init__( self, request_count_urn: str, - base_labels: Optional[Dict[str, str]] = None) -> None: + base_labels: Optional[dict[str, str]] = None) -> None: self.base_labels = base_labels if base_labels else {} self.request_count_urn = request_count_urn diff --git a/sdks/python/apache_beam/internal/util.py b/sdks/python/apache_beam/internal/util.py index cf2b5fdbb6b3..4384e6c74481 100644 --- a/sdks/python/apache_beam/internal/util.py +++ b/sdks/python/apache_beam/internal/util.py @@ -27,11 +27,7 @@ import weakref from multiprocessing.pool import ThreadPool from typing import Any -from typing import Dict from typing import Iterable -from typing import List -from typing import Tuple -from typing import Type from typing import TypeVar from typing import Union @@ -68,9 +64,9 @@ def __hash__(self): def remove_objects_from_args( args: Iterable[Any], - kwargs: Dict[str, Any], - pvalue_class: Union[Type[T], Tuple[Type[T], ...]] -) -> Tuple[List[Any], Dict[str, Any], List[T]]: + kwargs: dict[str, Any], + pvalue_class: Union[type[T], tuple[type[T], ...]] +) -> tuple[list[Any], dict[str, Any], list[T]]: """For internal use only; no backwards-compatibility guarantees. Replaces all objects of a given type in args/kwargs with a placeholder. diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index da904bf6fb55..000b824d0ed0 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -47,8 +47,6 @@ from functools import partial from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Union import fastavro @@ -564,10 +562,10 @@ def close(self, writer): for k, v in AVRO_PRIMITIVES_TO_BEAM_PRIMITIVES.items() } -_AvroSchemaType = Union[str, List, Dict] +_AvroSchemaType = Union[str, list, dict] -def avro_union_type_to_beam_type(union_type: List) -> schema_pb2.FieldType: +def avro_union_type_to_beam_type(union_type: list) -> schema_pb2.FieldType: """convert an avro union type to a beam type if the union type is a nullable, and it is a nullable union of an avro diff --git a/sdks/python/apache_beam/io/avroio_test.py b/sdks/python/apache_beam/io/avroio_test.py index 6669b6fb8abf..80c245ad3db4 100644 --- a/sdks/python/apache_beam/io/avroio_test.py +++ b/sdks/python/apache_beam/io/avroio_test.py @@ -27,7 +27,7 @@ import shutil import tempfile import unittest -from typing import List, Any +from typing import Any import fastavro import hamcrest as hc @@ -87,7 +87,7 @@ class AvroBase(object): - _temp_files: List[str] = [] + _temp_files: list[str] = [] def __init__(self, methodName='runTest'): super().__init__(methodName) diff --git a/sdks/python/apache_beam/io/components/rate_limiter.py b/sdks/python/apache_beam/io/components/rate_limiter.py index 2dc8a5340fdb..4d6bf2f17a8c 100644 --- a/sdks/python/apache_beam/io/components/rate_limiter.py +++ b/sdks/python/apache_beam/io/components/rate_limiter.py @@ -25,8 +25,6 @@ import random import threading import time -from typing import Dict -from typing import List import grpc from envoy_data_plane.envoy.extensions.common.ratelimit.v3 import RateLimitDescriptor @@ -94,7 +92,7 @@ def __init__( self, service_address: str, domain: str, - descriptors: List[Dict[str, str]], + descriptors: list[dict[str, str]], timeout: float = 5.0, block_until_allowed: bool = True, retries: int = 3, diff --git a/sdks/python/apache_beam/io/debezium.py b/sdks/python/apache_beam/io/debezium.py index d1ca02aa68d1..5b5efe86fba9 100644 --- a/sdks/python/apache_beam/io/debezium.py +++ b/sdks/python/apache_beam/io/debezium.py @@ -80,7 +80,6 @@ import json from enum import Enum -from typing import List from typing import NamedTuple from typing import Optional @@ -109,8 +108,8 @@ class DriverClassName(Enum): 'ReadFromDebeziumSchema', [('connector_class', str), ('username', str), ('password', str), ('host', str), ('port', str), ('max_number_of_records', Optional[int]), - ('connection_properties', List[str]), - ('start_offset', Optional[List[str]]), + ('connection_properties', list[str]), + ('start_offset', Optional[list[str]]), ('offset_storage_path', Optional[str])]) diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index 0e967f1beec3..848fc043a4db 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -117,7 +117,7 @@ class CrossLanguageJdbcIOTest(unittest.TestCase): DbData = typing.NamedTuple( 'DbData', - [('container_fn', typing.Any), ('classpath', typing.List[str]), + [('container_fn', typing.Any), ('classpath', list[str]), ('db_string', str), ('connector', str)]) DB_CONTAINER_CLASSPATH_STRING = { 'postgres': DbData( diff --git a/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py b/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py index 23178b0ee363..7fc2f4351cc8 100644 --- a/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_kafkaio_it_test.py @@ -81,7 +81,7 @@ def build_write_pipeline(self, pipeline): | 'Generate' >> beam.Create(range(NUM_RECORDS)) # pylint: disable=bad-option-value | 'MakeKV' >> beam.Map( lambda x: (None if self.null_key else b'key', str(x).encode())). - with_output_types(typing.Tuple[typing.Optional[bytes], bytes]) + with_output_types(tuple[typing.Optional[bytes], bytes]) | 'WriteToKafka' >> WriteToKafka( producer_config={'bootstrap.servers': self.bootstrap_servers}, topic=self.topic, diff --git a/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py b/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py index 50703144d109..34447748c276 100644 --- a/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py +++ b/sdks/python/apache_beam/io/external/xlang_kafkaio_perf_test.py @@ -17,7 +17,6 @@ import logging import sys -import typing import apache_beam as beam from apache_beam.io import iobase @@ -78,7 +77,7 @@ def test(self): self.pipeline | 'Generate records' >> iobase.Read( SyntheticSource(self.parse_synthetic_source_options())) \ - .with_output_types(typing.Tuple[bytes, bytes]) + .with_output_types(tuple[bytes, bytes]) | 'Count records' >> beam.ParDo(CountMessages(self.metrics_namespace)) | 'Avoid Fusion' >> Reshuffle() | 'Measure time' >> beam.ParDo(MeasureTime(self.metrics_namespace)) diff --git a/sdks/python/apache_beam/io/filebasedio_perf_test.py b/sdks/python/apache_beam/io/filebasedio_perf_test.py index 78a390d9bed4..75ed67313a41 100644 --- a/sdks/python/apache_beam/io/filebasedio_perf_test.py +++ b/sdks/python/apache_beam/io/filebasedio_perf_test.py @@ -20,7 +20,6 @@ import logging import sys import uuid -from typing import Tuple import apache_beam as beam from apache_beam import typehints @@ -69,7 +68,7 @@ def _add_argparse_args(cls, parser): @typehints.with_output_types(bytes) -@typehints.with_input_types(Tuple[bytes, bytes]) +@typehints.with_input_types(tuple[bytes, bytes]) class SyntheticRecordToStrFn(beam.DoFn): """ A DoFn that convert key-value bytes from synthetic source to string record. diff --git a/sdks/python/apache_beam/io/filebasedsource.py b/sdks/python/apache_beam/io/filebasedsource.py index b80e4fb8a841..a766f0554ea2 100644 --- a/sdks/python/apache_beam/io/filebasedsource.py +++ b/sdks/python/apache_beam/io/filebasedsource.py @@ -30,7 +30,6 @@ from typing import Callable from typing import Iterable -from typing import Tuple from typing import Union from apache_beam.internal import pickler @@ -348,7 +347,7 @@ def __init__( self._size_track = None def process(self, element: Union[str, FileMetadata], *args, - **kwargs) -> Iterable[Tuple[FileMetadata, OffsetRange]]: + **kwargs) -> Iterable[tuple[FileMetadata, OffsetRange]]: if isinstance(element, FileMetadata): metadata_list = [element] else: diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py index 3251e567763f..7c8350743267 100644 --- a/sdks/python/apache_beam/io/fileio.py +++ b/sdks/python/apache_beam/io/fileio.py @@ -97,11 +97,7 @@ from typing import Any from typing import BinaryIO # pylint: disable=unused-import from typing import Callable -from typing import DefaultDict -from typing import Dict from typing import Iterable -from typing import List -from typing import Tuple from typing import Union import apache_beam as beam @@ -164,7 +160,7 @@ class _MatchAllFn(beam.DoFn): def __init__(self, empty_match_treatment): self._empty_match_treatment = empty_match_treatment - def process(self, file_pattern: str) -> List[filesystem.FileMetadata]: + def process(self, file_pattern: str) -> list[filesystem.FileMetadata]: # TODO: Should we batch the lookups? match_results = filesystems.FileSystems.match([file_pattern]) match_result = match_results[0] @@ -647,7 +643,7 @@ def expand(self, pcoll): def _create_writer( base_path, - writer_key: Tuple[str, IntervalWindow], + writer_key: tuple[str, IntervalWindow], create_metadata_fn: CreateFileMetadataFn, ): try: @@ -800,8 +796,9 @@ def __init__(self, destination: Callable[[Any], str], shards: int): self.shards = shards # We start the shards for a single destination at an arbitrary point. - self._shard_counter: DefaultDict[str, int] = collections.defaultdict( - lambda: random.randrange(self.shards)) + self._shard_counter: collections.defaultdict[ + str, + int] = collections.defaultdict(lambda: random.randrange(self.shards)) def _next_shard_for_destination(self, destination): self._shard_counter[destination] = ((self._shard_counter[destination] + 1) % @@ -821,9 +818,9 @@ class _WriteUnshardedRecordsFn(beam.DoFn): SPILLED_RECORDS = 'spilled_records' WRITTEN_FILES = 'written_files' - _writers_and_sinks: Dict[Tuple[str, BoundedWindow], Tuple[BinaryIO, + _writers_and_sinks: dict[tuple[str, BoundedWindow], tuple[BinaryIO, FileSink]] = None - _file_names: Dict[Tuple[str, BoundedWindow], str] = None + _file_names: dict[tuple[str, BoundedWindow], str] = None def __init__( self, @@ -903,7 +900,7 @@ class _RemoveDuplicates(beam.DoFn): def process( self, - element: Tuple[str, filesystem.FileMetadata], + element: tuple[str, filesystem.FileMetadata], count_state=beam.DoFn.StateParam(COUNT_STATE) ) -> Iterable[filesystem.FileMetadata]: @@ -927,7 +924,7 @@ class _RemoveOldDuplicates(beam.DoFn): def process( self, - element: Tuple[str, filesystem.FileMetadata], + element: tuple[str, filesystem.FileMetadata], time_state=beam.DoFn.StateParam(TIME_STATE) ) -> Iterable[filesystem.FileMetadata]: path = element[0] diff --git a/sdks/python/apache_beam/io/filesystem.py b/sdks/python/apache_beam/io/filesystem.py index e67337cf65ee..bfddb42dd912 100644 --- a/sdks/python/apache_beam/io/filesystem.py +++ b/sdks/python/apache_beam/io/filesystem.py @@ -36,9 +36,7 @@ import zlib from typing import BinaryIO # pylint: disable=unused-import from typing import Iterator -from typing import List from typing import Optional -from typing import Tuple import zstandard @@ -486,7 +484,7 @@ class MatchResult(object): """Result from the ``FileSystem`` match operation which contains the list of matched ``FileMetadata``. """ - def __init__(self, pattern: str, metadata_list: List[FileMetadata]) -> None: + def __init__(self, pattern: str, metadata_list: list[FileMetadata]) -> None: self.metadata_list = metadata_list self.pattern = pattern @@ -553,7 +551,7 @@ def join(self, basepath: str, *paths: str) -> str: raise NotImplementedError @abc.abstractmethod - def split(self, path: str) -> Tuple[str, str]: + def split(self, path: str) -> tuple[str, str]: """Splits the given path into two parts. Splits the path into a pair (head, tail) such that tail contains the last @@ -626,7 +624,7 @@ def _url_dirname(self, url_or_path): scheme, path = self._split_scheme(url_or_path) return self._combine_scheme(scheme, posixpath.dirname(path)) - def match_files(self, file_metas: List[FileMetadata], + def match_files(self, file_metas: list[FileMetadata], pattern: str) -> Iterator[FileMetadata]: """Filter :class:`FileMetadata` objects by *pattern* diff --git a/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py b/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py index 91c76b5d54bf..4b1cf6265a30 100644 --- a/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py +++ b/sdks/python/apache_beam/io/flink/flink_streaming_impulse_source.py @@ -24,7 +24,6 @@ import json from typing import Any -from typing import Dict from apache_beam import PTransform from apache_beam import Windowing @@ -35,7 +34,7 @@ class FlinkStreamingImpulseSource(PTransform): URN = "flink:transform:streaming_impulse:v1" - config: Dict[str, Any] = {} + config: dict[str, Any] = {} def expand(self, pbegin): assert isinstance(pbegin, pvalue.PBegin), ( diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py index 181c891c1b65..e1bb5583f38b 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery.py +++ b/sdks/python/apache_beam/io/gcp/bigquery.py @@ -366,10 +366,7 @@ def chain_after(result): import uuid import warnings from dataclasses import dataclass -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union import fastavro @@ -638,7 +635,7 @@ def _BigQuerySource(*args, **kwargs): @dataclass class _BigQueryExportResult: coder: beam.coders.Coder - paths: List[str] + paths: list[str] class _CustomBigQuerySource(BoundedSource): @@ -975,12 +972,12 @@ def __init__( dataset: Optional[str] = None, project: Optional[str] = None, query: Optional[str] = None, - selected_fields: Optional[List[str]] = None, + selected_fields: Optional[list[str]] = None, row_restriction: Optional[str] = None, pipeline_options: Optional[GoogleCloudOptions] = None, unique_id: Optional[uuid.UUID] = None, - bigquery_job_labels: Optional[Dict] = None, - bigquery_dataset_labels: Optional[Dict] = None, + bigquery_job_labels: Optional[dict] = None, + bigquery_dataset_labels: Optional[dict] = None, job_name: Optional[str] = None, step_name: Optional[str] = None, use_standard_sql: Optional[bool] = False, @@ -1607,7 +1604,7 @@ def _create_table_if_needed(self, table_reference, schema=None): additional_create_parameters=self.additional_bq_parameters) _KNOWN_TABLES.add(str_table_reference) - def _check_row_size(self, row_and_insert_id) -> Tuple[int, Optional[str]]: + def _check_row_size(self, row_and_insert_id) -> tuple[int, Optional[str]]: """Returns error string when the row estimated size is too big""" row_byte_size = get_deep_size(row_and_insert_id) @@ -2007,7 +2004,7 @@ def __init__( max_insert_payload_size=MAX_INSERT_PAYLOAD_SIZE, num_streaming_keys=DEFAULT_SHARDS_PER_DESTINATION, use_cdc_writes: bool = False, - primary_key: List[str] = None, + primary_key: list[str] = None, expansion_service=None, big_lake_configuration=None): """Initialize a WriteToBigQuery transform. @@ -2488,13 +2485,13 @@ class WriteResult: def __init__( self, method: str = None, - destination_load_jobid_pairs: PCollection[Tuple[str, + destination_load_jobid_pairs: PCollection[tuple[str, JobReference]] = None, - destination_file_pairs: PCollection[Tuple[str, Tuple[str, int]]] = None, - destination_copy_jobid_pairs: PCollection[Tuple[str, + destination_file_pairs: PCollection[tuple[str, tuple[str, int]]] = None, + destination_copy_jobid_pairs: PCollection[tuple[str, JobReference]] = None, - failed_rows: PCollection[Tuple[str, dict]] = None, - failed_rows_with_errors: PCollection[Tuple[str, dict, list]] = None): + failed_rows: PCollection[tuple[str, dict]] = None, + failed_rows_with_errors: PCollection[tuple[str, dict, list]] = None): self._method = method self._destination_load_jobid_pairs = destination_load_jobid_pairs @@ -2525,7 +2522,7 @@ def validate(self, valid_methods, attribute): @property def destination_load_jobid_pairs( - self) -> PCollection[Tuple[str, JobReference]]: + self) -> PCollection[tuple[str, JobReference]]: """A ``FILE_LOADS`` method attribute Returns: A PCollection of the table destinations that were successfully @@ -2539,7 +2536,7 @@ def destination_load_jobid_pairs( return self._destination_load_jobid_pairs @property - def destination_file_pairs(self) -> PCollection[Tuple[str, Tuple[str, int]]]: + def destination_file_pairs(self) -> PCollection[tuple[str, tuple[str, int]]]: """A ``FILE_LOADS`` method attribute Returns: A PCollection of the table destinations along with the @@ -2553,7 +2550,7 @@ def destination_file_pairs(self) -> PCollection[Tuple[str, Tuple[str, int]]]: @property def destination_copy_jobid_pairs( - self) -> PCollection[Tuple[str, JobReference]]: + self) -> PCollection[tuple[str, JobReference]]: """A ``FILE_LOADS`` method attribute Returns: A PCollection of the table destinations that were successfully @@ -2567,7 +2564,7 @@ def destination_copy_jobid_pairs( return self._destination_copy_jobid_pairs @property - def failed_rows(self) -> PCollection[Tuple[str, dict]]: + def failed_rows(self) -> PCollection[tuple[str, dict]]: """A ``[STREAMING_INSERTS, STORAGE_WRITE_API]`` method attribute Returns: A PCollection of rows that failed when inserting to BigQuery. @@ -2583,7 +2580,7 @@ def failed_rows(self) -> PCollection[Tuple[str, dict]]: return self._failed_rows @property - def failed_rows_with_errors(self) -> PCollection[Tuple[str, dict, list]]: + def failed_rows_with_errors(self) -> PCollection[tuple[str, dict, list]]: """A ``[STREAMING_INSERTS, STORAGE_WRITE_API]`` method attribute Returns: @@ -2642,7 +2639,7 @@ def __init__( with_auto_sharding=False, num_storage_api_streams=0, use_cdc_writes: bool = False, - primary_key: List[str] = None, + primary_key: list[str] = None, big_lake_configuration=None, expansion_service=None): self._table = table @@ -3180,7 +3177,7 @@ def __init__( validate: bool = False, kms_key: str = None, temp_dataset: Union[str, DatasetReference] = None, - bigquery_job_labels: Dict[str, str] = None, + bigquery_job_labels: dict[str, str] = None, query_priority: str = BigQueryQueryPriority.BATCH): if gcs_location: if not isinstance(gcs_location, (str, ValueProvider)): diff --git a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py index b6c177fc7418..ceab52444bb1 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_avro_tools.py @@ -24,7 +24,6 @@ """ from typing import Any -from typing import Dict # BigQuery types as listed in # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types @@ -67,8 +66,8 @@ def get_record_schema_from_dict_table_schema( schema_name: str, - table_schema: Dict[str, Any], - namespace: str = "apache_beam.io.gcp.bigquery") -> Dict[str, Any]: + table_schema: dict[str, Any], + namespace: str = "apache_beam.io.gcp.bigquery") -> dict[str, Any]: # noqa: F821 """Convert a table schema into an Avro schema. @@ -95,8 +94,8 @@ def get_record_schema_from_dict_table_schema( } -def table_field_to_avro_field(table_field: Dict[str, Any], - namespace: str) -> Dict[str, Any]: +def table_field_to_avro_field(table_field: dict[str, Any], + namespace: str) -> dict[str, Any]: # noqa: F821 """Convert a BigQuery field to an avro field. diff --git a/sdks/python/apache_beam/io/gcp/bigquery_change_history.py b/sdks/python/apache_beam/io/gcp/bigquery_change_history.py index dad56d26e499..f0a23ddce02a 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_change_history.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_change_history.py @@ -50,11 +50,8 @@ import time import uuid from typing import Any -from typing import Dict from typing import Iterable -from typing import List from typing import Optional -from typing import Tuple import apache_beam as beam from apache_beam.io.gcp import bigquery_tools @@ -155,7 +152,7 @@ class _StreamRestriction: __slots__ = ('stream_names', 'range') def __init__( - self, stream_names: Tuple[str, ...], start: int, stop: int) -> None: + self, stream_names: tuple[str, ...], start: int, stop: int) -> None: self.stream_names = stream_names # tuple of BQ stream name strings self.range = OffsetRange(start, stop) @@ -201,7 +198,7 @@ def try_claim(self, position: int) -> bool: def try_split( self, fraction_of_remainder: float - ) -> Optional[Tuple[_StreamRestriction, _StreamRestriction]]: + ) -> Optional[tuple[_StreamRestriction, _StreamRestriction]]: result = self._offset_tracker.try_split(fraction_of_remainder) if result is not None: primary, residual = result @@ -228,7 +225,7 @@ class _NonSplittableOffsetTracker(OffsetRestrictionTracker): """ def try_split( self, fraction_of_remainder: float - ) -> Optional[Tuple[OffsetRange, OffsetRange]]: + ) -> Optional[tuple[OffsetRange, OffsetRange]]: if fraction_of_remainder == 0: return super().try_split(fraction_of_remainder) return None @@ -250,7 +247,7 @@ class _PollWatermarkEstimator(WatermarkEstimator): State is checkpointed as (watermark_hold, last_end) so both values survive SDF re-dispatch. """ - def __init__(self, state: Tuple[Timestamp, Timestamp]) -> None: + def __init__(self, state: tuple[Timestamp, Timestamp]) -> None: self._watermark_hold, self._last_end = state def observe_timestamp(self, timestamp: Timestamp) -> None: @@ -259,7 +256,7 @@ def observe_timestamp(self, timestamp: Timestamp) -> None: def current_watermark(self) -> Timestamp: return self._watermark_hold - def get_estimator_state(self) -> Tuple[Timestamp, Timestamp]: + def get_estimator_state(self) -> tuple[Timestamp, Timestamp]: return (self._watermark_hold, self._last_end) def set_watermark(self, timestamp: Timestamp) -> None: @@ -293,11 +290,11 @@ class _PollWatermarkEstimatorProvider(WatermarkEstimatorProvider): """ def initial_estimator_state( self, element: _PollConfig, - restriction: OffsetRange) -> Tuple[Timestamp, Timestamp]: + restriction: OffsetRange) -> tuple[Timestamp, Timestamp]: return (element.start_time, element.start_time) def create_watermark_estimator( - self, estimator_state: Tuple[Timestamp, + self, estimator_state: tuple[Timestamp, Timestamp]) -> _PollWatermarkEstimator: return _PollWatermarkEstimator(estimator_state) @@ -309,7 +306,7 @@ def build_changes_query( change_function: str, change_type_column: str = 'change_type', change_timestamp_column: str = 'change_timestamp', - columns: Optional[List[str]] = None, + columns: Optional[list[str]] = None, row_filter: Optional[str] = None) -> str: """Build a CHANGES() or APPENDS() SQL query. @@ -353,7 +350,7 @@ def build_changes_query( def compute_ranges(start: Timestamp, end: Timestamp, - change_function: str) -> List[Tuple[Timestamp, Timestamp]]: + change_function: str) -> list[tuple[Timestamp, Timestamp]]: """Split [start, end) into query-safe chunks. CHANGES() has a max 1-day range. APPENDS() has no limit. @@ -571,7 +568,7 @@ def __init__( location: Optional[str], change_type_column: str = 'change_type', change_timestamp_column: str = 'change_timestamp', - columns: Optional[List[str]] = None, + columns: Optional[list[str]] = None, row_filter: Optional[str] = None) -> None: self._table = table self._project = project @@ -735,8 +732,8 @@ def setup(self) -> None: self._ensure_client() def _split_all_streams( - self, stream_names: Tuple[str, ...], - max_split_rounds: int) -> Tuple[str, ...]: + self, stream_names: tuple[str, ...], + max_split_rounds: int) -> tuple[str, ...]: """Split each stream at fraction=0.5 for up to max_split_rounds rounds. Each round attempts to split every stream in the current list. A @@ -952,7 +949,7 @@ def _create_read_session(self, table_ref: 'bigquery.TableReference') -> Any: len(session.streams)) return session - def _read_stream(self, stream_name: str) -> Iterable[Dict[str, Any]]: + def _read_stream(self, stream_name: str) -> Iterable[dict[str, Any]]: """Read all rows from a single Storage API stream as dicts. When batch_arrow_read is enabled, converts entire Arrow RecordBatches @@ -966,7 +963,7 @@ def _read_stream(self, stream_name: str) -> Iterable[Dict[str, Any]]: yield from self._read_stream_row_by_row(stream_name) def _read_stream_row_by_row(self, - stream_name: str) -> Iterable[Dict[str, Any]]: + stream_name: str) -> Iterable[dict[str, Any]]: """Row-by-row Arrow conversion (lower memory than batch mode).""" t0 = time.time() row_count = 0 @@ -980,7 +977,7 @@ def _read_stream_row_by_row(self, elapsed, row_count / elapsed if elapsed > 0 else 0) - def _read_stream_batch(self, stream_name: str) -> Iterable[Dict[str, Any]]: + def _read_stream_batch(self, stream_name: str) -> Iterable[dict[str, Any]]: """Batch-convert Arrow RecordBatches for high throughput.""" schema = None row_count = 0 @@ -1002,7 +999,7 @@ def _read_stream_batch(self, stream_name: str) -> Iterable[Dict[str, Any]]: elapsed, row_count / elapsed if elapsed > 0 else 0) - def _read_stream_raw(self, stream_name: str) -> Iterable[Tuple[bytes, bytes]]: + def _read_stream_raw(self, stream_name: str) -> Iterable[tuple[bytes, bytes]]: """Yield raw (schema_bytes, batch_bytes) without decompression. Used when emit_raw_batches is enabled to defer decompression and @@ -1034,7 +1031,7 @@ class _DecompressArrowBatchesFn(beam.DoFn): def __init__(self, change_timestamp_column: str = 'change_timestamp') -> None: self._change_timestamp_column = change_timestamp_column - def process(self, element: Tuple[bytes, bytes]) -> Iterable[Dict[str, Any]]: + def process(self, element: tuple[bytes, bytes]) -> Iterable[dict[str, Any]]: schema_bytes, batch_bytes = element schema = pyarrow.ipc.read_schema(pyarrow.py_buffer(schema_bytes)) batch = pyarrow.ipc.read_record_batch( @@ -1077,7 +1074,7 @@ def setup(self) -> None: def process( self, - element: Tuple[str, Tuple[int, int]], + element: tuple[str, tuple[int, int]], streams_read=beam.DoFn.StateParam(STREAMS_READ) ) -> None: table_key = element[0] @@ -1194,7 +1191,7 @@ def __init__( location: Optional[str] = None, change_type_column: str = 'change_type', change_timestamp_column: str = 'change_timestamp', - columns: Optional[List[str]] = None, + columns: Optional[list[str]] = None, row_filter: Optional[str] = None, batch_arrow_read: bool = True, max_split_rounds: int = 1, @@ -1322,9 +1319,9 @@ def expand(self, pbegin: beam.pvalue.PBegin) -> beam.PCollection: max_split_rounds=self._max_split_rounds, emit_raw_batches=emit_raw)) if emit_raw: - read_sdf = read_sdf.with_output_types(Tuple[bytes, bytes]) + read_sdf = read_sdf.with_output_types(tuple[bytes, bytes]) else: - read_sdf = read_sdf.with_output_types(Dict[str, Any]) + read_sdf = read_sdf.with_output_types(dict[str, Any]) read_outputs = ( query_results diff --git a/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py b/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py index 6432f3b4eeac..136b3cc56b7e 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_read_internal.py @@ -29,9 +29,7 @@ import uuid from typing import TYPE_CHECKING from typing import Any -from typing import Dict from typing import Iterable -from typing import List from typing import Optional from typing import Union @@ -197,7 +195,7 @@ def __init__( gcs_location: Union[str, ValueProvider] = None, validate: bool = False, use_json_exports: bool = False, - bigquery_job_labels: Dict[str, str] = None, + bigquery_job_labels: dict[str, str] = None, step_name: str = None, job_name: str = None, unique_id: str = None, @@ -462,7 +460,7 @@ def decode(self, value): value = json.loads(value.decode('utf-8')) return self._decode_row(value, self.fields) - def _decode_row(self, row: Dict[str, Any], schema_fields: List[FieldSchema]): + def _decode_row(self, row: dict[str, Any], schema_fields: list[FieldSchema]): for field in schema_fields: if field.name not in row: # The field exists in the schema, but it doesn't exist in this row. diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index 1a7a07706a39..a16d10003047 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -40,7 +40,6 @@ from json.decoder import JSONDecodeError from typing import Optional from typing import Sequence -from typing import Tuple from typing import TypeVar from typing import Union @@ -180,7 +179,7 @@ def get_hashable_destination(destination): def to_hashable_table_ref( - table_ref_elem_kv: Tuple[Union[str, TableReference], V]) -> Tuple[str, V]: + table_ref_elem_kv: tuple[Union[str, TableReference], V]) -> tuple[str, V]: """Turns the key of the input tuple to its string representation. The key should be either a string or a TableReference. diff --git a/sdks/python/apache_beam/io/gcp/bigtableio.py b/sdks/python/apache_beam/io/gcp/bigtableio.py index f10039e564d1..38b507aaed29 100644 --- a/sdks/python/apache_beam/io/gcp/bigtableio.py +++ b/sdks/python/apache_beam/io/gcp/bigtableio.py @@ -39,8 +39,6 @@ import logging import struct -from typing import Dict -from typing import List import apache_beam as beam from apache_beam.internal.metrics.metric import ServiceCallMetric @@ -262,7 +260,7 @@ def expand(self, input): input | beam.ParDo(self._DirectRowMutationsToBeamRow()).with_output_types( RowTypeConstraint.from_fields( - [("key", bytes), ("mutations", List[Dict[str, bytes]])])) + [("key", bytes), ("mutations", list[dict[str, bytes]])])) | external_write) else: return ( diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py index f66bf2e56405..9c493afe3837 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/helper.py @@ -25,7 +25,6 @@ import os import uuid -from typing import List from typing import Union from cachetools.func import ttl_cache @@ -70,7 +69,7 @@ def retry_on_rpc_error(exception): def create_entities(count, id_or_name=False): """Creates a list of entities with random keys.""" if id_or_name: - ids_or_names: List[Union[str, int]] = [ + ids_or_names: list[Union[str, int]] = [ uuid.uuid4().int & ((1 << 63) - 1) for _ in range(count) ] else: diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py b/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py index f7ce69099ca0..3db3d669c73b 100644 --- a/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py +++ b/sdks/python/apache_beam/io/gcp/datastore/v1new/types.py @@ -23,7 +23,6 @@ import copy from typing import Iterable -from typing import List from typing import Optional from typing import Union @@ -154,7 +153,7 @@ def __repr__(self): class Key(object): def __init__( self, - path_elements: List[Union[str, int]], + path_elements: list[Union[str, int]], parent: Optional['Key'] = None, project: Optional[str] = None, namespace: Optional[str] = None): diff --git a/sdks/python/apache_beam/io/gcp/experimental/spannerio.py b/sdks/python/apache_beam/io/gcp/experimental/spannerio.py index 04800ff015c8..c6f786604090 100644 --- a/sdks/python/apache_beam/io/gcp/experimental/spannerio.py +++ b/sdks/python/apache_beam/io/gcp/experimental/spannerio.py @@ -323,7 +323,7 @@ def snapshot_options(self): @with_input_types(ReadOperation, _SPANNER_TRANSACTION) -@with_output_types(typing.List[typing.Any]) +@with_output_types(list[typing.Any]) class _NaiveSpannerReadDoFn(DoFn): def __init__(self, spanner_configuration): """ @@ -439,7 +439,7 @@ def process(self, element, spanner_transaction): @with_input_types(ReadOperation) -@with_output_types(typing.Dict[typing.Any, typing.Any]) +@with_output_types(dict[typing.Any, typing.Any]) class _CreateReadPartitions(DoFn): """ A DoFn to create partitions. Uses the Partitioning API (PartitionRead / @@ -584,8 +584,8 @@ def create_transaction( exact_staleness))) -@with_input_types(typing.Dict[typing.Any, typing.Any]) -@with_output_types(typing.List[typing.Any]) +@with_input_types(dict[typing.Any, typing.Any]) +@with_output_types(list[typing.Any]) class _ReadFromPartitionFn(DoFn): """ A DoFn to perform reads from the partition. diff --git a/sdks/python/apache_beam/io/gcp/pubsub.py b/sdks/python/apache_beam/io/gcp/pubsub.py index 59eadee5538e..276103f52760 100644 --- a/sdks/python/apache_beam/io/gcp/pubsub.py +++ b/sdks/python/apache_beam/io/gcp/pubsub.py @@ -34,10 +34,8 @@ import re from typing import Any -from typing import List from typing import NamedTuple from typing import Optional -from typing import Tuple from typing import Union from apache_beam import coders @@ -467,7 +465,7 @@ def display_data(self): TOPIC_REGEXP = 'projects/([^/]+)/topics/(.+)' -def parse_topic(full_topic: str) -> Tuple[str, str]: +def parse_topic(full_topic: str) -> tuple[str, str]: match = re.match(TOPIC_REGEXP, full_topic) if not match: raise ValueError( @@ -754,7 +752,7 @@ class MultipleReadFromPubSub(PTransform): """ def __init__( self, - pubsub_source_descriptors: List[PubSubSourceDescriptor], + pubsub_source_descriptors: list[PubSubSourceDescriptor], with_attributes: bool = False, ): """Initializes ``PubSubMultipleReader``. diff --git a/sdks/python/apache_beam/io/iobase.py b/sdks/python/apache_beam/io/iobase.py index 67d6cd358a07..afc977406af0 100644 --- a/sdks/python/apache_beam/io/iobase.py +++ b/sdks/python/apache_beam/io/iobase.py @@ -39,7 +39,6 @@ from typing import Any from typing import Iterator from typing import Optional -from typing import Tuple from typing import Union import apache_beam as beam @@ -975,7 +974,7 @@ def display_data(self): def to_runner_api_parameter( self, context: PipelineContext, - ) -> Tuple[str, Any]: + ) -> tuple[str, Any]: from apache_beam.io.gcp.pubsub import _PubSubSource if isinstance(self.source, _PubSubSource): return ( @@ -1120,7 +1119,7 @@ def expand(self, pcoll): def to_runner_api_parameter( self, context: PipelineContext, - ) -> Tuple[str, Any]: + ) -> tuple[str, Any]: # TODO(BEAM-27443): Remove the need for special casing here. # Importing locally to prevent circular dependencies. from apache_beam.io.gcp.pubsub import _PubSubSink diff --git a/sdks/python/apache_beam/io/jdbc.py b/sdks/python/apache_beam/io/jdbc.py index 741507634539..20792fe858e2 100644 --- a/sdks/python/apache_beam/io/jdbc.py +++ b/sdks/python/apache_beam/io/jdbc.py @@ -123,7 +123,7 @@ def default_io_expansion_service(classpath=None): 'Config', [('driver_class_name', str), ('jdbc_url', str), ('username', str), ('password', str), ('connection_properties', typing.Optional[str]), - ('connection_init_sqls', typing.Optional[typing.List[str]]), + ('connection_init_sqls', typing.Optional[list[str]]), ('read_query', typing.Optional[str]), ('write_statement', typing.Optional[str]), ('fetch_size', typing.Optional[np.int16]), diff --git a/sdks/python/apache_beam/io/kafka.py b/sdks/python/apache_beam/io/kafka.py index b1847544d395..f7cdd7f7b876 100644 --- a/sdks/python/apache_beam/io/kafka.py +++ b/sdks/python/apache_beam/io/kafka.py @@ -111,9 +111,9 @@ ReadFromKafkaSchema = typing.NamedTuple( 'ReadFromKafkaSchema', - [('consumer_config', typing.Mapping[str, str]), - ('topics', typing.List[str]), ('key_deserializer', str), - ('value_deserializer', str), ('start_read_time', typing.Optional[int]), + [('consumer_config', typing.Mapping[str, str]), ('topics', list[str]), + ('key_deserializer', str), ('value_deserializer', str), + ('start_read_time', typing.Optional[int]), ('max_num_records', typing.Optional[int]), ('max_read_time', typing.Optional[int]), ('commit_offset_in_finalize', bool), ('timestamp_policy', str), diff --git a/sdks/python/apache_beam/io/requestresponse.py b/sdks/python/apache_beam/io/requestresponse.py index e53fa07471af..9fdf33e2299d 100644 --- a/sdks/python/apache_beam/io/requestresponse.py +++ b/sdks/python/apache_beam/io/requestresponse.py @@ -26,12 +26,9 @@ import time from datetime import timedelta from typing import Any -from typing import Dict from typing import Generic -from typing import List from typing import Mapping from typing import Optional -from typing import Tuple from typing import TypeVar from typing import Union @@ -294,7 +291,7 @@ class _FilterCacheReadFn(beam.DoFn): It emits to main output for successful cache read requests or to the tagged output - `cache_misses` - otherwise.""" - def process(self, element: Tuple[RequestT, ResponseT], *args, **kwargs): + def process(self, element: tuple[RequestT, ResponseT], *args, **kwargs): if not element[1]: yield pvalue.TaggedOutput('cache_misses', element[0]) else: @@ -455,7 +452,7 @@ def __init__( *, request_coder: Optional[coders.Coder], response_coder: Optional[coders.Coder], - kwargs: Optional[Dict[str, Any]] = None, + kwargs: Optional[dict[str, Any]] = None, source_caller: Optional[Caller] = None, mode: _RedisMode, ): @@ -538,13 +535,13 @@ def _write_cache(self, element): def __call__(self, element, *args, **kwargs): if self.mode == _RedisMode.READ: - if isinstance(element, List): + if isinstance(element, list): responses = [self._read_cache(e) for e in element] return responses else: return self._read_cache(element) else: - if isinstance(element, List): + if isinstance(element, list): responses = [self._write_cache(e) for e in element] return responses else: @@ -563,7 +560,7 @@ def __init__( port: int, time_to_live: Union[int, timedelta], *, - kwargs: Optional[Dict[str, Any]] = None, + kwargs: Optional[dict[str, Any]] = None, request_coder: Optional[coders.Coder], response_coder: Optional[coders.Coder], source_caller: Optional[Caller[RequestT, ResponseT]] = None, @@ -602,7 +599,7 @@ def expand( return requests | RequestResponseIO(self.redis_caller) -class _WriteToRedis(beam.PTransform[beam.PCollection[Tuple[RequestT, +class _WriteToRedis(beam.PTransform[beam.PCollection[tuple[RequestT, ResponseT]], beam.PCollection[ResponseT]]): """A `PTransfrom` that performs write to Redis cache.""" @@ -612,7 +609,7 @@ def __init__( port: int, time_to_live: Union[int, timedelta], *, - kwargs: Optional[Dict[str, Any]] = None, + kwargs: Optional[dict[str, Any]] = None, request_coder: Optional[coders.Coder], response_coder: Optional[coders.Coder], source_caller: Optional[Caller[RequestT, ResponseT]] = None, @@ -646,7 +643,7 @@ def __init__( mode=_RedisMode.WRITE) def expand( - self, elements: beam.PCollection[Tuple[RequestT, ResponseT]] + self, elements: beam.PCollection[tuple[RequestT, ResponseT]] ) -> beam.PCollection[ResponseT]: return elements | RequestResponseIO(self.redis_caller) diff --git a/sdks/python/apache_beam/io/requestresponse_it_test.py b/sdks/python/apache_beam/io/requestresponse_it_test.py index 8703653b266e..072a1dc27b9f 100644 --- a/sdks/python/apache_beam/io/requestresponse_it_test.py +++ b/sdks/python/apache_beam/io/requestresponse_it_test.py @@ -21,7 +21,6 @@ import typing import unittest from dataclasses import dataclass -from typing import Tuple from typing import Union import pytest @@ -155,7 +154,7 @@ def setUpClass(cls) -> None: cls.client = EchoHTTPCaller(http_endpoint_address) @classmethod - def _get_client_and_options(cls) -> Tuple[EchoHTTPCaller, EchoITOptions]: + def _get_client_and_options(cls) -> tuple[EchoHTTPCaller, EchoITOptions]: assert cls.options is not None assert cls.client is not None return cls.client, cls.options diff --git a/sdks/python/apache_beam/io/restriction_trackers.py b/sdks/python/apache_beam/io/restriction_trackers.py index 4b819e87a8d6..7f4cf14747fa 100644 --- a/sdks/python/apache_beam/io/restriction_trackers.py +++ b/sdks/python/apache_beam/io/restriction_trackers.py @@ -18,8 +18,6 @@ """`iobase.RestrictionTracker` implementations provided with Apache Beam.""" # pytype: skip-file -from typing import Tuple - from apache_beam.io.iobase import RestrictionProgress from apache_beam.io.iobase import RestrictionTracker from apache_beam.io.range_trackers import OffsetRangeTracker @@ -62,7 +60,7 @@ def split(self, desired_num_offsets_per_split, min_num_offsets_per_split=1): yield OffsetRange(current_split_start, current_split_stop) current_split_start = current_split_stop - def split_at(self, split_pos) -> Tuple['OffsetRange', 'OffsetRange']: + def split_at(self, split_pos) -> tuple['OffsetRange', 'OffsetRange']: return OffsetRange(self.start, split_pos), OffsetRange(split_pos, self.stop) def new_tracker(self): diff --git a/sdks/python/apache_beam/io/textio.py b/sdks/python/apache_beam/io/textio.py index ba28fc608a0c..5b2e6fc47360 100644 --- a/sdks/python/apache_beam/io/textio.py +++ b/sdks/python/apache_beam/io/textio.py @@ -24,7 +24,6 @@ from functools import partial from typing import TYPE_CHECKING from typing import Any -from typing import Dict from typing import Optional from typing import Union @@ -1041,7 +1040,7 @@ def ReadFromJson( *, orient: str = 'records', lines: bool = True, - dtype: Union[bool, Dict[str, Any]] = False, + dtype: Union[bool, dict[str, Any]] = False, **kwargs): """A PTransform for reading json values from files into a PCollection. diff --git a/sdks/python/apache_beam/metrics/cells.py b/sdks/python/apache_beam/metrics/cells.py index 0eb0e53e1d84..8300d7f063e9 100644 --- a/sdks/python/apache_beam/metrics/cells.py +++ b/sdks/python/apache_beam/metrics/cells.py @@ -23,6 +23,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import copy import logging import threading diff --git a/sdks/python/apache_beam/metrics/execution.py b/sdks/python/apache_beam/metrics/execution.py index ede0975ddb65..e304658e09ab 100644 --- a/sdks/python/apache_beam/metrics/execution.py +++ b/sdks/python/apache_beam/metrics/execution.py @@ -32,6 +32,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import threading from typing import TYPE_CHECKING from typing import Any diff --git a/sdks/python/apache_beam/metrics/metric.py b/sdks/python/apache_beam/metrics/metric.py index bfe023901bd2..b15237cae020 100644 --- a/sdks/python/apache_beam/metrics/metric.py +++ b/sdks/python/apache_beam/metrics/metric.py @@ -30,14 +30,9 @@ import logging import re from typing import TYPE_CHECKING -from typing import Dict -from typing import FrozenSet from typing import Iterable from typing import Iterator -from typing import List from typing import Optional -from typing import Set -from typing import Type from typing import Union from apache_beam.metrics import cells @@ -66,7 +61,7 @@ class Metrics(object): """Lets users create/access metric objects during pipeline execution.""" @staticmethod - def get_namespace(namespace: Union[Type, str]) -> str: + def get_namespace(namespace: Union[type, str]) -> str: if isinstance(namespace, type): return '{}.{}'.format(namespace.__module__, namespace.__name__) elif isinstance(namespace, str): @@ -76,7 +71,7 @@ def get_namespace(namespace: Union[Type, str]) -> str: @staticmethod def counter( - namespace: Union[Type, str], name: str) -> 'Metrics.DelegatingCounter': + namespace: Union[type, str], name: str) -> 'Metrics.DelegatingCounter': """Obtains or creates a Counter metric. Args: @@ -91,7 +86,7 @@ def counter( @staticmethod def distribution( - namespace: Union[Type, str], + namespace: Union[type, str], name: str, process_wide: bool = False) -> 'Metrics.DelegatingDistribution': """Obtains or creates a Distribution metric. @@ -111,7 +106,7 @@ def distribution( @staticmethod def gauge( - namespace: Union[Type, str], + namespace: Union[type, str], name: str, process_wide: bool = False) -> 'Metrics.DelegatingGauge': """Obtains or creates a Gauge metric. @@ -133,7 +128,7 @@ def gauge( @staticmethod def string_set( - namespace: Union[Type, str], name: str) -> 'Metrics.DelegatingStringSet': + namespace: Union[type, str], name: str) -> 'Metrics.DelegatingStringSet': """Obtains or creates a String set metric. String set metrics are restricted to string values. @@ -150,7 +145,7 @@ def string_set( @staticmethod def bounded_trie( - namespace: Union[Type, str], + namespace: Union[type, str], name: str) -> 'Metrics.DelegatingBoundedTrie': """Obtains or creates a Bounded Trie metric. @@ -166,7 +161,7 @@ def bounded_trie( @staticmethod def histogram( - namespace: Union[Type, str], + namespace: Union[type, str], name: str, bucket_type: 'BucketType', logger: Optional['MetricLogger'] = None) -> 'Metrics.DelegatingHistogram': @@ -263,7 +258,7 @@ def _matches_name(filter: 'MetricsFilter', metric_key: 'MetricKey') -> bool: return True @staticmethod - def _is_sub_list(needle: List[str], haystack: List[str]) -> bool: + def _is_sub_list(needle: list[str], haystack: list[str]) -> bool: """True iff `needle` is a sub-list of `haystack` (i.e. a contiguous slice of `haystack` exactly matches `needle`""" needle_len = len(needle) @@ -307,7 +302,7 @@ def matches( def query( self, filter: Optional['MetricsFilter'] = None - ) -> Dict[str, List['MetricResult']]: + ) -> dict[str, list['MetricResult']]: """Queries the runner for existing user metrics that match the filter. It should return a dictionary, with lists of each kind of metric, and @@ -338,20 +333,20 @@ class MetricsFilter(object): Note: This class only supports user defined metrics. """ def __init__(self) -> None: - self._names: Set[str] = set() - self._namespaces: Set[str] = set() - self._steps: Set[str] = set() + self._names: set[str] = set() + self._namespaces: set[str] = set() + self._steps: set[str] = set() @property - def steps(self) -> FrozenSet[str]: + def steps(self) -> frozenset[str]: return frozenset(self._steps) @property - def names(self) -> FrozenSet[str]: + def names(self) -> frozenset[str]: return frozenset(self._names) @property - def namespaces(self) -> FrozenSet[str]: + def namespaces(self) -> frozenset[str]: return frozenset(self._namespaces) def with_metric(self, metric: 'Metric') -> 'MetricsFilter': @@ -369,11 +364,11 @@ def with_names(self, names: Iterable[str]) -> 'MetricsFilter': self._names.update(names) return self - def with_namespace(self, namespace: Union[Type, str]) -> 'MetricsFilter': + def with_namespace(self, namespace: Union[type, str]) -> 'MetricsFilter': return self.with_namespaces([namespace]) def with_namespaces( - self, namespaces: Iterable[Union[Type, str]]) -> 'MetricsFilter': + self, namespaces: Iterable[Union[type, str]]) -> 'MetricsFilter': if isinstance(namespaces, str): raise ValueError('Namespaces must be an iterable, not a string') @@ -515,7 +510,7 @@ def add_raw(self, *rollup_segments: str) -> None: @staticmethod def query(results: MetricResults, label: str, - truncated_marker: str = '*') -> Set[str]: + truncated_marker: str = '*') -> set[str]: if not label in Lineage._METRICS: raise ValueError("Label {} does not exist for Lineage", label) response = results.query( diff --git a/sdks/python/apache_beam/metrics/metricbase.py b/sdks/python/apache_beam/metrics/metricbase.py index 9b35bb24f895..addbf14153cf 100644 --- a/sdks/python/apache_beam/metrics/metricbase.py +++ b/sdks/python/apache_beam/metrics/metricbase.py @@ -34,7 +34,6 @@ # pytype: skip-file -from typing import Dict from typing import Optional __all__ = [ @@ -61,7 +60,7 @@ def __init__( namespace: Optional[str], name: Optional[str], urn: Optional[str] = None, - labels: Optional[Dict[str, str]] = None) -> None: + labels: Optional[dict[str, str]] = None) -> None: """Initializes ``MetricName``. Note: namespace and name should be set for user metrics, diff --git a/sdks/python/apache_beam/metrics/monitoring_infos.py b/sdks/python/apache_beam/metrics/monitoring_infos.py index 294bcef039a8..ad24d244993c 100644 --- a/sdks/python/apache_beam/metrics/monitoring_infos.py +++ b/sdks/python/apache_beam/metrics/monitoring_infos.py @@ -20,9 +20,7 @@ import collections import time from functools import reduce -from typing import FrozenSet from typing import Hashable -from typing import List from typing import Union from apache_beam.coders import coder_impl @@ -483,12 +481,12 @@ def get_step_name(monitoring_info_proto): def to_key( - monitoring_info_proto: metrics_pb2.MonitoringInfo) -> FrozenSet[Hashable]: + monitoring_info_proto: metrics_pb2.MonitoringInfo) -> frozenset[Hashable]: """Returns a key based on the URN and labels. This is useful in maps to prevent reporting the same MonitoringInfo twice. """ - key_items: List[Hashable] = list(monitoring_info_proto.labels.items()) + key_items: list[Hashable] = list(monitoring_info_proto.labels.items()) key_items.append(monitoring_info_proto.urn) return frozenset(key_items) diff --git a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py index cb33ef60ef2c..baad9330ee06 100644 --- a/sdks/python/apache_beam/ml/gcp/cloud_dlp.py +++ b/sdks/python/apache_beam/ml/gcp/cloud_dlp.py @@ -20,7 +20,6 @@ """ import logging -from typing import List from google.cloud import dlp_v2 @@ -128,7 +127,7 @@ def expand(self, pcoll): @typehints.with_input_types(str) -@typehints.with_output_types(List[dlp_v2.types.dlp.Finding]) +@typehints.with_output_types(list[dlp_v2.types.dlp.Finding]) class InspectForDetails(PTransform): """Inspects input text for sensitive information. the ``PTransform`` returns a ``PCollection`` of diff --git a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py index f46b8d61639b..ad3173b9974b 100644 --- a/sdks/python/apache_beam/ml/gcp/naturallanguageml.py +++ b/sdks/python/apache_beam/ml/gcp/naturallanguageml.py @@ -18,7 +18,6 @@ from typing import Mapping from typing import Optional from typing import Sequence -from typing import Tuple from typing import Union import apache_beam as beam @@ -83,7 +82,7 @@ def AnnotateText( features: Union[Mapping[str, bool], language_v1.AnnotateTextRequest.Features], timeout: Optional[float] = None, - metadata: Optional[Sequence[Tuple[str, str]]] = None): + metadata: Optional[Sequence[tuple[str, str]]] = None): """A :class:`~apache_beam.transforms.ptransform.PTransform` for annotating text using the Google Cloud Natural Language API: https://cloud.google.com/natural-language/docs. @@ -113,7 +112,7 @@ def __init__( features: Union[Mapping[str, bool], language_v1.AnnotateTextRequest.Features], timeout: Optional[float], - metadata: Optional[Sequence[Tuple[str, str]]] = None): + metadata: Optional[Sequence[tuple[str, str]]] = None): self.features = features self.timeout = timeout self.metadata = metadata diff --git a/sdks/python/apache_beam/ml/gcp/recommendations_ai.py b/sdks/python/apache_beam/ml/gcp/recommendations_ai.py index 9730a6b2b1d9..d3f89e5adfbc 100644 --- a/sdks/python/apache_beam/ml/gcp/recommendations_ai.py +++ b/sdks/python/apache_beam/ml/gcp/recommendations_ai.py @@ -22,7 +22,6 @@ from __future__ import absolute_import from typing import Sequence -from typing import Tuple from cachetools.func import ttl_cache from google.api_core.retry import Retry @@ -98,7 +97,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = "default_catalog"): """Initializes a :class:`CreateCatalogItem` transform. @@ -144,7 +143,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = None): self._client = None self.retry = retry @@ -201,7 +200,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = "default_catalog"): """Initializes a :class:`ImportCatalogItems` transform @@ -301,7 +300,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = "default_catalog", event_store: str = "default_event_store"): """Initializes a :class:`WriteUserEvent` transform. @@ -400,7 +399,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = "default_catalog", event_store: str = "default_event_store"): """Initializes a :class:`WriteUserEvent` transform. @@ -506,7 +505,7 @@ def __init__( project: str = None, retry: Retry = None, timeout: float = 120, - metadata: Sequence[Tuple[str, str]] = (), + metadata: Sequence[tuple[str, str]] = (), catalog_name: str = "default_catalog", event_store: str = "default_event_store", placement_id: str = None): diff --git a/sdks/python/apache_beam/ml/gcp/videointelligenceml.py b/sdks/python/apache_beam/ml/gcp/videointelligenceml.py index 25fc258b35a1..2e328a326e59 100644 --- a/sdks/python/apache_beam/ml/gcp/videointelligenceml.py +++ b/sdks/python/apache_beam/ml/gcp/videointelligenceml.py @@ -18,7 +18,6 @@ """A connector for sending API requests to the GCP Video Intelligence API.""" from typing import Optional -from typing import Tuple from typing import Union from cachetools.func import ttl_cache @@ -203,7 +202,7 @@ def expand(self, pvalue): @typehints.with_input_types( - Tuple[Union[str, bytes], Optional[videointelligence.VideoContext]]) + tuple[Union[str, bytes], Optional[videointelligence.VideoContext]]) class _VideoAnnotateFnWithContext(_VideoAnnotateFn): """A DoFn that unpacks each input tuple to element, video_context variables and sends these to the GCP Video Intelligence API service and outputs diff --git a/sdks/python/apache_beam/ml/gcp/visionml.py b/sdks/python/apache_beam/ml/gcp/visionml.py index c4ef30710d58..e8b55304ce77 100644 --- a/sdks/python/apache_beam/ml/gcp/visionml.py +++ b/sdks/python/apache_beam/ml/gcp/visionml.py @@ -20,9 +20,7 @@ A connector for sending API requests to the GCP Vision API. """ -from typing import List from typing import Optional -from typing import Tuple from typing import Union from cachetools.func import ttl_cache @@ -154,7 +152,7 @@ def expand(self, pvalue): metadata=self.metadata))) @typehints.with_input_types(Union[str, bytes], Optional[vision.ImageContext]) - @typehints.with_output_types(List[vision.AnnotateImageRequest]) + @typehints.with_output_types(list[vision.AnnotateImageRequest]) def _create_image_annotation_pairs(self, element, context_side_input): if context_side_input: # If we have a side input image context, use that image_context = context_side_input.get(element) @@ -249,8 +247,8 @@ def expand(self, pvalue): metadata=self.metadata))) @typehints.with_input_types( - Tuple[Union[str, bytes], Optional[vision.ImageContext]]) - @typehints.with_output_types(List[vision.AnnotateImageRequest]) + tuple[Union[str, bytes], Optional[vision.ImageContext]]) + @typehints.with_output_types(list[vision.AnnotateImageRequest]) def _create_image_annotation_pairs(self, element, **kwargs): element, image_context = element # Unpack (image, image_context) tuple if isinstance(element, str): @@ -267,7 +265,7 @@ def _create_image_annotation_pairs(self, element, **kwargs): yield request -@typehints.with_input_types(List[vision.AnnotateImageRequest]) +@typehints.with_input_types(list[vision.AnnotateImageRequest]) class _ImageAnnotateFn(DoFn): """A DoFn that sends each input element to the GCP Vision API. Returns ``google.cloud.vision.BatchAnnotateImagesResponse``. diff --git a/sdks/python/apache_beam/ml/inference/model_manager.py b/sdks/python/apache_beam/ml/inference/model_manager.py index bae18f492351..9c0d90d7a02f 100644 --- a/sdks/python/apache_beam/ml/inference/model_manager.py +++ b/sdks/python/apache_beam/ml/inference/model_manager.py @@ -37,9 +37,7 @@ from collections import deque from typing import Any from typing import Callable -from typing import Dict from typing import Optional -from typing import Tuple import numpy as np import torch @@ -122,7 +120,7 @@ def reset_peak(self): self._memory_history.append((now, self._current_usage)) self._peak_usage = self._current_usage - def get_stats(self) -> Tuple[float, float, float]: + def get_stats(self) -> tuple[float, float, float]: with self._lock: return self._current_usage, self._peak_usage, self._total_memory @@ -186,7 +184,7 @@ def __init__( self.smoothing_factor = smoothing_factor self.min_data_points = min_data_points self.verbose_logging = verbose_logging - self.estimates: Dict[str, float] = {} + self.estimates: dict[str, float] = {} self.history = defaultdict(lambda: deque(maxlen=20)) self.known_models = set() self._lock = threading.Lock() @@ -214,7 +212,7 @@ def set_initial_estimate(self, model_tag: str, cost: float): self.logging_info("Initial Profile for %s: %s MB", model_tag, cost) def add_observation( - self, active_snapshot: Dict[str, int], peak_memory: float): + self, active_snapshot: dict[str, int], peak_memory: float): if active_snapshot: model_list = "\n".join( f"\t- {model}: {count}" diff --git a/sdks/python/apache_beam/ml/rag/chunking/base.py b/sdks/python/apache_beam/ml/rag/chunking/base.py index 626a6ea8abbe..0286e02a81f6 100644 --- a/sdks/python/apache_beam/ml/rag/chunking/base.py +++ b/sdks/python/apache_beam/ml/rag/chunking/base.py @@ -19,7 +19,6 @@ import functools from collections.abc import Callable from typing import Any -from typing import Dict from typing import Optional import apache_beam as beam @@ -71,7 +70,7 @@ def __init__(self, chunk_id_fn: Optional[ChunkIdFn] = None): @abc.abstractmethod def get_splitter_transform( self - ) -> beam.PTransform[beam.PCollection[Dict[str, Any]], + ) -> beam.PTransform[beam.PCollection[dict[str, Any]], beam.PCollection[Chunk]]: """Creates transforms that emits splits for given content.""" raise NotImplementedError( @@ -79,7 +78,7 @@ def get_splitter_transform( def get_ptransform_for_processing( self, **kwargs - ) -> beam.PTransform[beam.PCollection[Dict[str, Any]], + ) -> beam.PTransform[beam.PCollection[dict[str, Any]], beam.PCollection[Chunk]]: """Creates transform for processing documents into chunks.""" ptransform = ( diff --git a/sdks/python/apache_beam/ml/rag/chunking/base_test.py b/sdks/python/apache_beam/ml/rag/chunking/base_test.py index 54e25591c348..36d6428695a4 100644 --- a/sdks/python/apache_beam/ml/rag/chunking/base_test.py +++ b/sdks/python/apache_beam/ml/rag/chunking/base_test.py @@ -18,7 +18,6 @@ import unittest from typing import Any -from typing import Dict from typing import Optional import pytest @@ -54,7 +53,7 @@ def __init__(self, chunk_id_fn: Optional[ChunkIdFn] = None): def get_splitter_transform( self - ) -> beam.PTransform[beam.PCollection[Dict[str, Any]], + ) -> beam.PTransform[beam.PCollection[dict[str, Any]], beam.PCollection[Chunk]]: return beam.ParDo(WordSplitter()) diff --git a/sdks/python/apache_beam/ml/rag/chunking/langchain.py b/sdks/python/apache_beam/ml/rag/chunking/langchain.py index 9e3b6b0c8ef9..c89bc15d2da1 100644 --- a/sdks/python/apache_beam/ml/rag/chunking/langchain.py +++ b/sdks/python/apache_beam/ml/rag/chunking/langchain.py @@ -16,8 +16,6 @@ # from typing import Any -from typing import Dict -from typing import List from typing import Optional import apache_beam as beam @@ -37,7 +35,7 @@ def __init__( self, text_splitter: TextSplitter, document_field: str, - metadata_fields: List[str], + metadata_fields: list[str], chunk_id_fn: Optional[ChunkIdFn] = None): """A ChunkingTransformProvider that uses LangChain text splitters. @@ -94,7 +92,7 @@ def __init__( def get_splitter_transform( self - ) -> beam.PTransform[beam.PCollection[Dict[str, Any]], + ) -> beam.PTransform[beam.PCollection[dict[str, Any]], beam.PCollection[Chunk]]: return "Langchain text split" >> beam.ParDo( _LangChainTextSplitter( @@ -108,7 +106,7 @@ def __init__( self, text_splitter: TextSplitter, document_field: str, - metadata_fields: List[str]): + metadata_fields: list[str]): self.text_splitter = text_splitter self.document_field = document_field self.metadata_fields = metadata_fields diff --git a/sdks/python/apache_beam/ml/rag/embeddings/base.py b/sdks/python/apache_beam/ml/rag/embeddings/base.py index 0128d6a6d6fc..1f85f1d89daf 100644 --- a/sdks/python/apache_beam/ml/rag/embeddings/base.py +++ b/sdks/python/apache_beam/ml/rag/embeddings/base.py @@ -22,7 +22,6 @@ """ from collections.abc import Sequence -from typing import List from apache_beam.ml.rag.types import EmbeddableItem from apache_beam.ml.rag.types import Embedding @@ -48,7 +47,7 @@ def create_text_adapter( create_rag_adapter = create_text_adapter -def _extract_text(items: Sequence[EmbeddableItem]) -> List[str]: +def _extract_text(items: Sequence[EmbeddableItem]) -> list[str]: """Extract text from items for embedding.""" texts = [] for item in items: @@ -62,7 +61,7 @@ def _extract_text(items: Sequence[EmbeddableItem]) -> List[str]: def _add_embedding_fn( items: Sequence[EmbeddableItem], - embeddings: Sequence[List[float]]) -> List[EmbeddableItem]: + embeddings: Sequence[list[float]]) -> list[EmbeddableItem]: """Create Embeddings from items and embedding vectors.""" for item, embedding in zip(items, embeddings): item.embedding = Embedding(dense_embedding=embedding) diff --git a/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search.py b/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search.py index e9269af27bd4..220b41598b07 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/bigquery_vector_search.py @@ -20,10 +20,7 @@ from dataclasses import dataclass from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union from google.cloud import bigquery @@ -166,13 +163,13 @@ class BigQueryVectorSearchParameters: project: str table_name: str embedding_column: str - columns: List[str] + columns: list[str] neighbor_count: int metadata_restriction_template: Optional[Union[str, Callable[[EmbeddableItem], str]]] = None distance_type: Optional[str] = None - options: Optional[Dict[str, Any]] = None + options: Optional[dict[str, Any]] = None include_distance: bool = False def _format_restrict(self, item: EmbeddableItem) -> str: @@ -185,7 +182,7 @@ def _format_restrict(self, item: EmbeddableItem) -> str: return self.metadata_restriction_template(item) return self.metadata_restriction_template.format(**item.metadata) - def format_query(self, items: List[EmbeddableItem]) -> str: + def format_query(self, items: list[EmbeddableItem]) -> str: """Format the vector search query template.""" base_columns_str = ", ".join(f"base.{col}" for col in self.columns) columns_str = ", ".join(self.columns) @@ -267,8 +264,8 @@ def format_query(self, items: List[EmbeddableItem]) -> str: class BigQueryVectorSearchEnrichmentHandler( - EnrichmentSourceHandler[Union[EmbeddableItem, List[EmbeddableItem]], - List[Tuple[EmbeddableItem, Dict[str, Any]]]]): + EnrichmentSourceHandler[Union[EmbeddableItem, list[EmbeddableItem]], + list[tuple[EmbeddableItem, dict[str, Any]]]]): """Enrichment handler that performs vector similarity search using BigQuery. This handler enriches EmbeddableItems by finding similar vectors in a @@ -348,9 +345,9 @@ def __enter__(self): def __call__( self, - request: Union[EmbeddableItem, List[EmbeddableItem]], + request: Union[EmbeddableItem, list[EmbeddableItem]], *args, - **kwargs) -> List[Tuple[EmbeddableItem, Dict[str, Any]]]: + **kwargs) -> list[tuple[EmbeddableItem, dict[str, Any]]]: """Process request(s) using BigQuery vector search. Args: @@ -389,11 +386,11 @@ def __call__( def __exit__(self, exc_type, exc_val, exc_tb): self.client.close() - def batch_elements_kwargs(self) -> Dict[str, int]: + def batch_elements_kwargs(self) -> dict[str, int]: """Returns kwargs for beam.BatchElements.""" return self._batching_kwargs -def join_fn(left: Embedding, right: Dict[str, Any]) -> Embedding: +def join_fn(left: Embedding, right: dict[str, Any]) -> Embedding: left.metadata['enrichment_data'] = right return left diff --git a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py index 85a63cfba21e..cd310b784703 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search.py @@ -20,10 +20,7 @@ from dataclasses import field from enum import Enum from typing import Any -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union from google.protobuf.json_format import MessageToDict @@ -130,7 +127,7 @@ class BaseSearchParameters: anns_field: str limit: int = 3 filter: str = field(default_factory=str) - search_params: Dict[str, Any] = field(default_factory=dict) + search_params: dict[str, Any] = field(default_factory=dict) consistency_level: Optional[str] = None def __post_init__(self): @@ -156,7 +153,7 @@ class VectorSearchParameters(BaseSearchParameters): Note: For inherited parameters documentation, see BaseSearchParameters. """ - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) @dataclass @@ -174,7 +171,7 @@ class KeywordSearchParameters(BaseSearchParameters): Note: For inherited parameters documentation, see BaseSearchParameters. """ - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) @dataclass @@ -195,7 +192,7 @@ class HybridSearchParameters: keyword: KeywordSearchParameters ranker: MilvusBaseRanker limit: int = 3 - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if not self.vector or not self.keyword: @@ -236,8 +233,8 @@ class MilvusSearchParameters: """ collection_name: str search_strategy: SearchStrategyType - partition_names: List[str] = field(default_factory=list) - output_fields: List[str] = field(default_factory=list) + partition_names: list[str] = field(default_factory=list) + output_fields: list[str] = field(default_factory=list) timeout: Optional[float] = None round_decimal: int = -1 @@ -271,10 +268,10 @@ class MilvusCollectionLoadParameters: parameters. Enables forward compatibility. """ refresh: bool = field(default_factory=bool) - resource_groups: List[str] = field(default_factory=list) - load_fields: List[str] = field(default_factory=list) + resource_groups: list[str] = field(default_factory=list) + load_fields: list[str] = field(default_factory=list) skip_load_dynamic_field: bool = field(default_factory=bool) - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) @dataclass @@ -288,13 +285,13 @@ class MilvusSearchResult: fields: List of dictionaries containing additional field values for each entity. Each dictionary corresponds to one returned entity. """ - id: List[Union[str, int]] = field(default_factory=list) - distance: List[float] = field(default_factory=list) - fields: List[Dict[str, Any]] = field(default_factory=list) + id: list[Union[str, int]] = field(default_factory=list) + distance: list[float] = field(default_factory=list) + fields: list[dict[str, Any]] = field(default_factory=list) -InputT, OutputT = (Union[EmbeddableItem, List[EmbeddableItem]], - List[Tuple[EmbeddableItem, Dict[str, Any]]]) +InputT, OutputT = (Union[EmbeddableItem, list[EmbeddableItem]], + list[tuple[EmbeddableItem, dict[str, Any]]]) class MilvusSearchEnrichmentHandler(EnrichmentSourceHandler[InputT, OutputT]): @@ -415,9 +412,9 @@ def connect_and_load(): def __call__( self, - request: Union[EmbeddableItem, List[EmbeddableItem]], + request: Union[EmbeddableItem, list[EmbeddableItem]], *args, - **kwargs) -> List[Tuple[EmbeddableItem, Dict[str, Any]]]: + **kwargs) -> list[tuple[EmbeddableItem, dict[str, Any]]]: reqs = request if isinstance(request, list) else [request] # Early return for empty requests to avoid unnecessary connection attempts if not reqs: @@ -425,7 +422,7 @@ def __call__( search_result = self._search_documents(reqs) return self._get_call_response(reqs, search_result) - def _search_documents(self, embeddable_items: List[EmbeddableItem]): + def _search_documents(self, embeddable_items: list[EmbeddableItem]): if isinstance(self.search_strategy, HybridSearchParameters): data = self._get_hybrid_search_data(embeddable_items) return self._client.hybrid_search( @@ -464,7 +461,7 @@ def _search_documents(self, embeddable_items: List[EmbeddableItem]): raise ValueError( f"Not supported search strategy yet: {self.search_strategy}") - def _get_hybrid_search_data(self, embeddable_items: List[EmbeddableItem]): + def _get_hybrid_search_data(self, embeddable_items: list[EmbeddableItem]): vector_search_data = list( map(self._get_vector_search_data, embeddable_items)) keyword_search_data = list( @@ -507,7 +504,7 @@ def _get_keyword_search_data(self, embeddable_item: EmbeddableItem): def _get_call_response( self, - embeddable_items: List[EmbeddableItem], + embeddable_items: list[EmbeddableItem], search_result: SearchResult[Hits]): response = [] for i in range(len(embeddable_items)): @@ -523,7 +520,7 @@ def _get_call_response( response.append((embeddable_item, result.__dict__)) return response - def _normalize_milvus_fields(self, fields: Dict[str, Any]): + def _normalize_milvus_fields(self, fields: dict[str, Any]): normalized_fields = {} for field, value in fields.items(): value = self._normalize_milvus_value(value) @@ -543,7 +540,7 @@ def _normalize_milvus_value(self, value: Any): return value def convert_sparse_embedding_to_milvus_format( - self, sparse_vector: Tuple[List[int], List[float]]) -> Dict[int, float]: + self, sparse_vector: tuple[list[int], list[float]]) -> dict[int, float]: if not sparse_vector: return None # Converts sparse embedding from (indices, values) tuple format to @@ -586,11 +583,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._client.close() self._client = None - def batch_elements_kwargs(self) -> Dict[str, int]: + def batch_elements_kwargs(self) -> dict[str, int]: """Returns kwargs for beam.BatchElements.""" return self._batching_kwargs -def join_fn(left: Embedding, right: Dict[str, Any]) -> Embedding: +def join_fn(left: Embedding, right: dict[str, Any]) -> Embedding: left.metadata['enrichment_data'] = right return left diff --git a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py index 307563868fe8..f37fc4931487 100644 --- a/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py +++ b/sdks/python/apache_beam/ml/rag/enrichment/milvus_search_it_test.py @@ -19,7 +19,6 @@ import unittest from dataclasses import dataclass from dataclasses import field -from typing import Dict import pytest @@ -129,7 +128,7 @@ class MilvusITDataConstruct: tags: list[str] dense_embedding: list[float] sparse_embedding: dict - vocabulary: Dict[str, int] = field(default_factory=dict) + vocabulary: dict[str, int] = field(default_factory=dict) def __getitem__(self, key): return getattr(self, key) diff --git a/sdks/python/apache_beam/ml/rag/ingestion/alloydb.py b/sdks/python/apache_beam/ml/rag/ingestion/alloydb.py index 333c259f9b86..39e501af7810 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/alloydb.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/alloydb.py @@ -16,8 +16,6 @@ from dataclasses import dataclass from typing import Any -from typing import Dict -from typing import List from typing import Optional from apache_beam.ml.rag.ingestion.jdbc_common import ConnectionConfig @@ -64,11 +62,11 @@ class AlloyDBLanguageConnectorConfig: ip_type: str = "PRIVATE" enable_iam_auth: bool = False target_principal: Optional[str] = None - delegates: Optional[List[str]] = None + delegates: Optional[list[str]] = None admin_service_endpoint: Optional[str] = None quota_project: Optional[str] = None - connection_properties: Optional[Dict[str, str]] = None - additional_properties: Optional[Dict[str, Any]] = None + connection_properties: Optional[dict[str, str]] = None + additional_properties: Optional[dict[str, Any]] = None def to_jdbc_url(self) -> str: """Convert options to a properly formatted JDBC URL. @@ -115,7 +113,7 @@ def to_connection_config(self): connection_properties=self.connection_properties, additional_jdbc_args=self.additional_jdbc_args()) - def additional_jdbc_args(self) -> Dict[str, List[Any]]: + def additional_jdbc_args(self) -> dict[str, list[Any]]: return { 'classpath': [ "org.postgresql:postgresql:42.2.16", @@ -132,7 +130,7 @@ def __init__( *, # pylint: disable=dangerous-default-value write_config: WriteConfig = WriteConfig(), - column_specs: List[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( + column_specs: list[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( ), conflict_resolution: Optional[ConflictResolution] = ConflictResolution( on_conflict_fields=[], action='IGNORE')): diff --git a/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py b/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py index ce98de19a1de..35f58d0b071a 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/alloydb_it_test.py @@ -21,7 +21,6 @@ import secrets import time import unittest -from typing import List from typing import NamedTuple import psycopg2 @@ -52,7 +51,7 @@ 'CustomSpecsRow', [ ('custom_id', str), # For id_spec test - ('embedding_vec', List[float]), # For embedding_spec test + ('embedding_vec', list[float]), # For embedding_spec test ('content_col', str), # For content_spec test ('metadata', str) ]) @@ -65,7 +64,7 @@ ('source', str), # For metadata_spec and composite key ('timestamp', str), # For metadata_spec and composite key ('content', str), - ('embedding', List[float]), + ('embedding', list[float]), ('metadata', str) ]) registry.register_coder(MetadataConflictRow, RowCoder) diff --git a/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py b/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py index 2a7111c0d35f..20f7febe78f1 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/bigquery.py @@ -17,7 +17,6 @@ import warnings from collections.abc import Callable from typing import Any -from typing import Dict from typing import Optional import apache_beam as beam @@ -27,7 +26,7 @@ from apache_beam.ml.rag.types import EmbeddableItem from apache_beam.typehints.row_type import RowTypeConstraint -EmbeddableToDictFn = Callable[[EmbeddableItem], Dict[str, any]] +EmbeddableToDictFn = Callable[[EmbeddableItem], dict[str, any]] # Backward compatibility alias. ChunkToDictFn = EmbeddableToDictFn @@ -35,7 +34,7 @@ class SchemaConfig: def __init__( self, - schema: Dict, + schema: dict, embeddable_to_dict_fn: Optional[EmbeddableToDictFn] = None, **kwargs): """Configuration for custom BigQuery schema and row conversion. @@ -83,7 +82,7 @@ def __init__( class BigQueryVectorWriterConfig(VectorDatabaseWriteConfig): def __init__( self, - write_config: Dict[str, Any], + write_config: dict[str, Any], *, # Force keyword arguments schema_config: Optional[SchemaConfig] = None): """Configuration for writing vectors to BigQuery using managed transforms. diff --git a/sdks/python/apache_beam/ml/rag/ingestion/cloudsql.py b/sdks/python/apache_beam/ml/rag/ingestion/cloudsql.py index 4cd6474ba348..4307ac2d94e4 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/cloudsql.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/cloudsql.py @@ -17,8 +17,6 @@ from dataclasses import asdict from dataclasses import dataclass from typing import Any -from typing import Dict -from typing import List from typing import Optional from apache_beam.ml.rag.ingestion import mysql @@ -61,15 +59,15 @@ class LanguageConnectorConfig: password: str database_name: str instance_name: str - ip_types: Optional[List[str]] = None + ip_types: Optional[list[str]] = None enable_iam_auth: bool = False target_principal: Optional[str] = None - delegates: Optional[List[str]] = None + delegates: Optional[list[str]] = None quota_project: Optional[str] = None - connection_properties: Optional[Dict[str, str]] = None - additional_properties: Optional[Dict[str, Any]] = None + connection_properties: Optional[dict[str, str]] = None + additional_properties: Optional[dict[str, Any]] = None - def _base_jdbc_properties(self) -> Dict[str, Any]: + def _base_jdbc_properties(self) -> dict[str, Any]: properties = {"cloudSqlInstance": self.instance_name} if self.ip_types: @@ -109,7 +107,7 @@ def to_connection_config(self): connection_properties=self.connection_properties, additional_jdbc_args=self.additional_jdbc_args()) - def additional_jdbc_args(self) -> Dict[str, List[Any]]: + def additional_jdbc_args(self) -> dict[str, list[Any]]: return {} @@ -125,7 +123,7 @@ def to_jdbc_url(self) -> str: socketFactory="com.google.cloud.sql.postgres.SocketFactory", database_type="postgresql") - def additional_jdbc_args(self) -> Dict[str, List[Any]]: + def additional_jdbc_args(self) -> dict[str, list[Any]]: return { 'classpath': [ "org.postgresql:postgresql:42.2.16", @@ -146,7 +144,7 @@ def __init__( *, # pylint: disable=dangerous-default-value write_config: WriteConfig = WriteConfig(), - column_specs: List[postgres_common.ColumnSpec] = postgres_common. + column_specs: list[postgres_common.ColumnSpec] = postgres_common. ColumnSpecsBuilder.with_defaults().build(), conflict_resolution: Optional[ postgres_common.ConflictResolution] = postgres_common. @@ -229,7 +227,7 @@ def to_jdbc_url(self) -> str: socketFactory="com.google.cloud.sql.mysql.SocketFactory", database_type="mysql") - def additional_jdbc_args(self) -> Dict[str, List[Any]]: + def additional_jdbc_args(self) -> dict[str, list[Any]]: return { 'classpath': [ "mysql:mysql-connector-java:8.0.22", @@ -250,7 +248,7 @@ def __init__( *, write_config: WriteConfig = WriteConfig(), # pylint: disable=dangerous-default-value - column_specs: List[mysql_common.ColumnSpec] = mysql_common. + column_specs: list[mysql_common.ColumnSpec] = mysql_common. ColumnSpecsBuilder.with_defaults().build(), conflict_resolution: Optional[mysql_common.ConflictResolution] = None): self.connector_config = _MySQLConnectorConfig.from_base_config( diff --git a/sdks/python/apache_beam/ml/rag/ingestion/cloudsql_it_test.py b/sdks/python/apache_beam/ml/rag/ingestion/cloudsql_it_test.py index 7ae49ba51823..1d4b988a5db0 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/cloudsql_it_test.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/cloudsql_it_test.py @@ -23,7 +23,6 @@ import unittest from dataclasses import dataclass from typing import Any -from typing import List from typing import Literal from typing import Optional @@ -318,7 +317,7 @@ def build_jdbc_params(helper: DatabaseTestHelper, table_name: str) -> dict: @staticmethod def verify_standard_operations( - pipeline, jdbc_params: dict, expected_chunks: List[Chunk]): + pipeline, jdbc_params: dict, expected_chunks: list[Chunk]): num_records = len(expected_chunks) sample_size = min(500, num_records // 2) diff --git a/sdks/python/apache_beam/ml/rag/ingestion/jdbc_common.py b/sdks/python/apache_beam/ml/rag/ingestion/jdbc_common.py index 586bb7a4aa65..35fd5b35ef3c 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/jdbc_common.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/jdbc_common.py @@ -17,8 +17,6 @@ from dataclasses import dataclass from dataclasses import field from typing import Any -from typing import Dict -from typing import List from typing import Optional @@ -55,9 +53,9 @@ class ConnectionConfig: jdbc_url: str username: str password: str - connection_properties: Optional[Dict[str, str]] = None - connection_init_sqls: Optional[List[str]] = None - additional_jdbc_args: Dict[str, Any] = field(default_factory=dict) + connection_properties: Optional[dict[str, str]] = None + connection_init_sqls: Optional[list[str]] = None + additional_jdbc_args: dict[str, Any] = field(default_factory=dict) @dataclass diff --git a/sdks/python/apache_beam/ml/rag/ingestion/milvus_search.py b/sdks/python/apache_beam/ml/rag/ingestion/milvus_search.py index 7d7c554cc68e..b7cad3796b13 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/milvus_search.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/milvus_search.py @@ -19,8 +19,6 @@ from dataclasses import field from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Optional from pymilvus import MilvusClient @@ -65,7 +63,7 @@ class MilvusWriteConfig: partition_name: str = "" timeout: Optional[float] = None write_config: WriteConfig = field(default_factory=WriteConfig) - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if not self.collection_name: @@ -113,10 +111,10 @@ class MilvusVectorWriterConfig(VectorDatabaseWriteConfig): """ connection_params: MilvusConnectionParameters write_config: MilvusWriteConfig - column_specs: List[ColumnSpec] = field( + column_specs: list[ColumnSpec] = field( default_factory=lambda: MilvusVectorWriterConfig.default_column_specs()) - def create_converter(self) -> Callable[[EmbeddableItem], Dict[str, Any]]: + def create_converter(self) -> Callable[[EmbeddableItem], dict[str, Any]]: """Creates a function to convert EmbeddableItem objects to Milvus records. Returns: @@ -124,7 +122,7 @@ def create_converter(self) -> Callable[[EmbeddableItem], Dict[str, Any]]: dictionary representing a Milvus record with fields mapped according to column_specs. """ - def convert(chunk: EmbeddableItem) -> Dict[str, Any]: + def convert(chunk: EmbeddableItem) -> dict[str, Any]: result = {} for col in self.column_specs: result[col.column_name] = col.value_fn(chunk) @@ -143,7 +141,7 @@ def create_write_transform(self) -> beam.PTransform: return _WriteToMilvusVectorDatabase(self) @staticmethod - def default_column_specs() -> List[ColumnSpec]: + def default_column_specs() -> list[ColumnSpec]: """Returns default column specifications for RAG use cases. Creates column mappings for standard RAG fields: id, dense embedding, diff --git a/sdks/python/apache_beam/ml/rag/ingestion/mysql.py b/sdks/python/apache_beam/ml/rag/ingestion/mysql.py index 45f33ea2bad5..75117441ccf6 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/mysql.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/mysql.py @@ -19,7 +19,6 @@ from abc import ABC from abc import abstractmethod from typing import Callable -from typing import List from typing import NamedTuple from typing import Optional @@ -41,23 +40,23 @@ class _ConflictResolutionStrategy(ABC): """Abstract base class for conflict resolution strategies.""" @abstractmethod - def get_conflict_clause(self, all_columns: List[str]) -> str: + def get_conflict_clause(self, all_columns: list[str]) -> str: """Generate the MySQL conflict clause.""" pass class _NoConflictStrategy(_ConflictResolutionStrategy): """Strategy for when no conflict resolution is needed.""" - def get_conflict_clause(self, all_columns: List[str]) -> str: + def get_conflict_clause(self, all_columns: list[str]) -> str: return "" class _UpdateStrategy(_ConflictResolutionStrategy): """Strategy for UPDATE action on conflict.""" - def __init__(self, update_fields: Optional[List[str]] = None): + def __init__(self, update_fields: Optional[list[str]] = None): self.update_fields = update_fields - def get_conflict_clause(self, all_columns: List[str]) -> str: + def get_conflict_clause(self, all_columns: list[str]) -> str: # Use provided fields or default to all columns fields_to_update = self.update_fields or all_columns assert len(fields_to_update) > 0 @@ -71,7 +70,7 @@ class _IgnoreStrategy(_ConflictResolutionStrategy): def __init__(self, primary_key_field: str): self.primary_key_field = primary_key_field - def get_conflict_clause(self, all_columns: List[str]) -> str: + def get_conflict_clause(self, all_columns: list[str]) -> str: return f"ON DUPLICATE KEY UPDATE {self.primary_key_field}"\ f" = {self.primary_key_field}" @@ -94,7 +93,7 @@ def __init__( self, table_name: str, *, - column_specs: List[ColumnSpec], + column_specs: list[ColumnSpec], conflict_resolution: Optional[ConflictResolution] = None): """Builds SQL queries for writing EmbeddableItems with Embeddings to MySQL. """ @@ -150,7 +149,7 @@ def __init__( *, # pylint: disable=dangerous-default-value write_config: WriteConfig = WriteConfig(), - column_specs: List[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( + column_specs: list[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( ), conflict_resolution: Optional[ConflictResolution] = None): """Configuration for writing vectors to MySQL using jdbc. diff --git a/sdks/python/apache_beam/ml/rag/ingestion/mysql_common.py b/sdks/python/apache_beam/ml/rag/ingestion/mysql_common.py index 829e95de9f2f..ccda96681347 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/mysql_common.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/mysql_common.py @@ -18,11 +18,8 @@ from dataclasses import dataclass from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Literal from typing import Optional -from typing import Type from apache_beam.ml.rag.types import EmbeddableItem @@ -97,7 +94,7 @@ class ColumnSpec: json: Creates a JSON column specification. """ column_name: str - python_type: Type + python_type: type value_fn: Callable[[EmbeddableItem], Any] placeholder: str = '?' @@ -139,7 +136,7 @@ def json( return cls(column_name, str, value_fn) -def embedding_to_string(embedding: List[float]) -> str: +def embedding_to_string(embedding: list[float]) -> str: """Convert embedding to MySQL vector string format.""" return '[' + ','.join(str(x) for x in embedding) + ']' @@ -147,7 +144,7 @@ def embedding_to_string(embedding: List[float]) -> str: class ColumnSpecsBuilder: """Builder for :class:`.ColumnSpec`'s with chainable methods.""" def __init__(self): - self._specs: List[ColumnSpec] = [] + self._specs: list[ColumnSpec] = [] @staticmethod def with_defaults() -> 'ColumnSpecsBuilder': @@ -159,7 +156,7 @@ def with_defaults() -> 'ColumnSpecsBuilder': def with_id_spec( self, column_name: str = "id", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None) -> 'ColumnSpecsBuilder': """Add ID :class:`.ColumnSpec` with optional type and conversion. @@ -193,7 +190,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_content_spec( self, column_name: str = "content", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None) -> 'ColumnSpecsBuilder': """Add content :class:`.ColumnSpec` with optional type and conversion. @@ -227,8 +224,8 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_metadata_spec( self, column_name: str = "metadata", - python_type: Type = str, - convert_fn: Optional[Callable[[Dict[str, Any]], Any]] = None + python_type: type = str, + convert_fn: Optional[Callable[[dict[str, Any]], Any]] = None ) -> 'ColumnSpecsBuilder': """Add metadata :class:`.ColumnSpec` with optional type and conversion. @@ -263,7 +260,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_embedding_spec( self, column_name: str = "embedding", - convert_fn: Callable[[List[float]], Any] = embedding_to_string + convert_fn: Callable[[list[float]], Any] = embedding_to_string ) -> 'ColumnSpecsBuilder': """Add embedding :class:`.ColumnSpec` with optional conversion. @@ -295,7 +292,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def add_metadata_field( self, field: str, - python_type: Type, + python_type: type, column_name: Optional[str] = None, convert_fn: Optional[Callable[[Any], Any]] = None, default: Any = None) -> 'ColumnSpecsBuilder': @@ -380,7 +377,7 @@ def add_custom_column_spec(self, spec: ColumnSpec) -> 'ColumnSpecsBuilder': self._specs.append(spec) return self - def build(self) -> List[ColumnSpec]: + def build(self) -> list[ColumnSpec]: """Build the final list of column specifications.""" return self._specs.copy() @@ -427,7 +424,7 @@ class ConflictResolution: ... ) """ action: Literal["UPDATE", "IGNORE"] = "UPDATE" - update_fields: Optional[List[str]] = None + update_fields: Optional[list[str]] = None primary_key_field: Optional[str] = None def __post_init__(self): diff --git a/sdks/python/apache_beam/ml/rag/ingestion/postgres.py b/sdks/python/apache_beam/ml/rag/ingestion/postgres.py index b01e450e9bec..3d41b0b11b11 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/postgres.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/postgres.py @@ -16,8 +16,6 @@ import logging from typing import Callable -from typing import Dict -from typing import List from typing import NamedTuple from typing import Optional from typing import Union @@ -36,7 +34,7 @@ _LOGGER = logging.getLogger(__name__) -MetadataSpec = Union[ColumnSpec, Dict[str, ColumnSpec]] +MetadataSpec = Union[ColumnSpec, dict[str, ColumnSpec]] class _PostgresQueryBuilder: @@ -44,7 +42,7 @@ def __init__( self, table_name: str, *, - column_specs: List[ColumnSpec], + column_specs: list[ColumnSpec], conflict_resolution: Optional[ConflictResolution] = None): """Builds SQL queries for writing EmbeddableItems to Postgres. """ @@ -111,7 +109,7 @@ def __init__( *, # pylint: disable=dangerous-default-value write_config: WriteConfig = WriteConfig(), - column_specs: List[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( + column_specs: list[ColumnSpec] = ColumnSpecsBuilder.with_defaults().build( ), conflict_resolution: Optional[ConflictResolution] = ConflictResolution( on_conflict_fields=[], action='IGNORE')): diff --git a/sdks/python/apache_beam/ml/rag/ingestion/postgres_common.py b/sdks/python/apache_beam/ml/rag/ingestion/postgres_common.py index 4aa08fc7c494..2456f78eba9b 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/postgres_common.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/postgres_common.py @@ -18,12 +18,8 @@ from dataclasses import dataclass from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Literal from typing import Optional -from typing import Tuple -from typing import Type from typing import Union from apache_beam.ml.rag.types import EmbeddableItem @@ -108,7 +104,7 @@ class ColumnSpec: jsonb: Creates a JSONB column specification with jsonb casting. """ column_name: str - python_type: Type + python_type: type value_fn: Callable[[EmbeddableItem], Any] sql_typecast: Optional[str] = None @@ -158,7 +154,7 @@ def jsonb( class ColumnSpecsBuilder: """Builder for :class:`.ColumnSpec`'s with chainable methods.""" def __init__(self): - self._specs: List[ColumnSpec] = [] + self._specs: list[ColumnSpec] = [] @staticmethod def with_defaults() -> 'ColumnSpecsBuilder': @@ -170,7 +166,7 @@ def with_defaults() -> 'ColumnSpecsBuilder': def with_id_spec( self, column_name: str = "id", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None, sql_typecast: Optional[str] = None) -> 'ColumnSpecsBuilder': """Add ID :class:`.ColumnSpec` with optional type and conversion. @@ -207,7 +203,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_content_spec( self, column_name: str = "content", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None, sql_typecast: Optional[str] = None) -> 'ColumnSpecsBuilder': """Add content :class:`.ColumnSpec` with optional type and conversion. @@ -244,8 +240,8 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_metadata_spec( self, column_name: str = "metadata", - python_type: Type = str, - convert_fn: Optional[Callable[[Dict[str, Any]], Any]] = None, + python_type: type = str, + convert_fn: Optional[Callable[[dict[str, Any]], Any]] = None, sql_typecast: Optional[str] = "::jsonb") -> 'ColumnSpecsBuilder': """Add metadata :class:`.ColumnSpec` with optional type and conversion. @@ -284,7 +280,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_embedding_spec( self, column_name: str = "embedding", - convert_fn: Optional[Callable[[List[float]], Any]] = None + convert_fn: Optional[Callable[[list[float]], Any]] = None ) -> 'ColumnSpecsBuilder': """Add embedding :class:`.ColumnSpec` with optional conversion. @@ -318,7 +314,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def with_sparse_embedding_spec( self, column_name: str = "sparse_embedding", - conv_fn: Optional[Callable[[Tuple[List[int], List[float]]], Any]] = None + conv_fn: Optional[Callable[[tuple[list[int], list[float]]], Any]] = None ) -> 'ColumnSpecsBuilder': """Add sparse embedding :class:`.ColumnSpec` with optional conversion. @@ -354,7 +350,7 @@ def value_fn(chunk: EmbeddableItem) -> Any: def add_metadata_field( self, field: str, - python_type: Type, + python_type: type, column_name: Optional[str] = None, convert_fn: Optional[Callable[[Any], Any]] = None, default: Any = None, @@ -450,7 +446,7 @@ def add_custom_column_spec(self, spec: ColumnSpec) -> 'ColumnSpecsBuilder': self._specs.append(spec) return self - def build(self) -> List[ColumnSpec]: + def build(self) -> list[ColumnSpec]: """Build the final list of column specifications.""" return self._specs.copy() @@ -491,11 +487,11 @@ class ConflictResolution: ... action="IGNORE" ... ) """ - on_conflict_fields: Union[str, List[str]] + on_conflict_fields: Union[str, list[str]] action: Literal["UPDATE", "IGNORE"] = "UPDATE" - update_fields: Optional[List[str]] = None + update_fields: Optional[list[str]] = None - def maybe_set_default_update_fields(self, columns: List[str]): + def maybe_set_default_update_fields(self, columns: list[str]): if self.action != "UPDATE": return if self.update_fields is not None: diff --git a/sdks/python/apache_beam/ml/rag/ingestion/postgres_it_test.py b/sdks/python/apache_beam/ml/rag/ingestion/postgres_it_test.py index adbe28b5d086..2da715bbd804 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/postgres_it_test.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/postgres_it_test.py @@ -21,7 +21,6 @@ import secrets import time import unittest -from typing import List from typing import NamedTuple import psycopg2 @@ -50,7 +49,7 @@ 'CustomSpecsRow', [ ('custom_id', str), # For id_spec test - ('embedding_vec', List[float]), # For embedding_spec test + ('embedding_vec', list[float]), # For embedding_spec test ('content_col', str), # For content_spec test ('metadata', str) ]) @@ -63,7 +62,7 @@ ('source', str), # For metadata_spec and composite key ('timestamp', str), # For metadata_spec and composite key ('content', str), - ('embedding', List[float]), + ('embedding', list[float]), ('metadata', str) ]) registry.register_coder(MetadataConflictRow, RowCoder) diff --git a/sdks/python/apache_beam/ml/rag/ingestion/spanner.py b/sdks/python/apache_beam/ml/rag/ingestion/spanner.py index 8e108759721e..c3e29f4a6e33 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/spanner.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/spanner.py @@ -65,11 +65,9 @@ from dataclasses import dataclass from typing import Any from typing import Callable -from typing import List from typing import Literal from typing import NamedTuple from typing import Optional -from typing import Type import apache_beam as beam from apache_beam.coders import registry @@ -108,7 +106,7 @@ class SpannerColumnSpec: ... ) """ column_name: str - python_type: Type + python_type: type value_fn: Callable[[EmbeddableItem], Any] @@ -137,7 +135,7 @@ class SpannerColumnSpecsBuilder: ... ) """ def __init__(self): - self._specs: List[SpannerColumnSpec] = [] + self._specs: list[SpannerColumnSpec] = [] @staticmethod def with_defaults() -> 'SpannerColumnSpecsBuilder': @@ -159,7 +157,7 @@ def with_defaults() -> 'SpannerColumnSpecsBuilder': def with_id_spec( self, column_name: str = "id", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None ) -> 'SpannerColumnSpecsBuilder': """Add ID column specification. @@ -195,7 +193,7 @@ def with_id_spec( def with_embedding_spec( self, column_name: str = "embedding", - convert_fn: Optional[Callable[[List[float]], List[float]]] = None + convert_fn: Optional[Callable[[list[float]], list[float]]] = None ) -> 'SpannerColumnSpecsBuilder': """Add embedding array column (ARRAY or ARRAY). @@ -221,7 +219,7 @@ def with_embedding_spec( ... convert_fn=lambda vec: [round(x, 4) for x in vec] ... ) """ - def extract_fn(embeddable: EmbeddableItem) -> List[float]: + def extract_fn(embeddable: EmbeddableItem) -> list[float]: if not embeddable.dense_embedding: raise ValueError(f'EmbeddableItem must contain embedding: {embeddable}') return embeddable.dense_embedding @@ -229,7 +227,7 @@ def extract_fn(embeddable: EmbeddableItem) -> List[float]: self._specs.append( SpannerColumnSpec( column_name=column_name, - python_type=List[float], + python_type=list[float], value_fn=functools.partial( _extract_and_convert, extract_fn, convert_fn))) return self @@ -237,7 +235,7 @@ def extract_fn(embeddable: EmbeddableItem) -> List[float]: def with_content_spec( self, column_name: str = "content", - python_type: Type = str, + python_type: type = str, convert_fn: Optional[Callable[[str], Any]] = None ) -> 'SpannerColumnSpecsBuilder': """Add content column. @@ -301,7 +299,7 @@ def with_metadata_spec( def add_metadata_field( self, field: str, - python_type: Type, + python_type: type, column_name: Optional[str] = None, convert_fn: Optional[Callable[[Any], Any]] = None, default: Any = None) -> 'SpannerColumnSpecsBuilder': @@ -369,7 +367,7 @@ def value_fn(embeddable: EmbeddableItem) -> Any: def add_column( self, column_name: str, - python_type: Type, + python_type: type, value_fn: Callable[[EmbeddableItem], Any]) -> 'SpannerColumnSpecsBuilder': """Add a custom column with full control. @@ -402,7 +400,7 @@ def add_column( value_fn=value_fn)) return self - def build(self) -> List[SpannerColumnSpec]: + def build(self) -> list[SpannerColumnSpec]: """Build the final list of column specifications. Returns: @@ -417,7 +415,7 @@ class _SpannerSchemaBuilder: Creates a NamedTuple type from column specifications and registers it with Beam's RowCoder for serialization. """ - def __init__(self, table_name: str, column_specs: List[SpannerColumnSpec]): + def __init__(self, table_name: str, column_specs: list[SpannerColumnSpec]): """Initialize schema builder. Args: @@ -511,7 +509,7 @@ def __init__( table_name: str, *, # Schema configuration - column_specs: Optional[List[SpannerColumnSpec]] = None, + column_specs: Optional[list[SpannerColumnSpec]] = None, # Write operation type write_mode: Literal["INSERT", "UPDATE", "REPLACE", "INSERT_OR_UPDATE"] = "INSERT_OR_UPDATE", diff --git a/sdks/python/apache_beam/ml/rag/ingestion/test_utils.py b/sdks/python/apache_beam/ml/rag/ingestion/test_utils.py index 0373874c09d2..29d9ab1c7e37 100644 --- a/sdks/python/apache_beam/ml/rag/ingestion/test_utils.py +++ b/sdks/python/apache_beam/ml/rag/ingestion/test_utils.py @@ -17,7 +17,6 @@ import hashlib import json -from typing import List import apache_beam as beam from apache_beam.ml.rag.types import Chunk @@ -57,7 +56,7 @@ def get_expected_values( range_start: int, range_end: int, content_prefix: str = "Testval", - seed_multiplier: int = 1) -> List[Chunk]: + seed_multiplier: int = 1) -> list[Chunk]: """Returns a range of test Chunks.""" return [ ChunkTestUtils.from_seed(i, content_prefix, seed_multiplier) diff --git a/sdks/python/apache_beam/ml/rag/test_utils.py b/sdks/python/apache_beam/ml/rag/test_utils.py index 6abe65d560b3..72f7bde5d80e 100644 --- a/sdks/python/apache_beam/ml/rag/test_utils.py +++ b/sdks/python/apache_beam/ml/rag/test_utils.py @@ -23,7 +23,6 @@ import unittest from dataclasses import dataclass from typing import Callable -from typing import List from typing import Optional from typing import cast @@ -244,7 +243,7 @@ def create_client(): exception_types=(MilvusException, )) # Configure schema. - field_schemas: List[FieldSchema] = cast(List[FieldSchema], config["fields"]) + field_schemas: list[FieldSchema] = cast(list[FieldSchema], config["fields"]) schema = CollectionSchema( fields=field_schemas, functions=config["functions"]) @@ -345,7 +344,7 @@ def create_user_yaml(service_port: int, max_vector_field_num=5): @staticmethod def assert_chunks_equivalent( - actual_chunks: List[Chunk], expected_chunks: List[Chunk]): + actual_chunks: list[Chunk], expected_chunks: list[Chunk]): """assert_chunks_equivalent checks for presence rather than exact match""" # Sort both lists by ID to ensure consistent ordering. actual_sorted = sorted(actual_chunks, key=lambda c: c.id) diff --git a/sdks/python/apache_beam/ml/rag/types.py b/sdks/python/apache_beam/ml/rag/types.py index 0de93a35306e..0128b1ecd0db 100644 --- a/sdks/python/apache_beam/ml/rag/types.py +++ b/sdks/python/apache_beam/ml/rag/types.py @@ -32,10 +32,7 @@ from dataclasses import dataclass from dataclasses import field from typing import Any -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union @@ -60,8 +57,8 @@ class Embedding: dense_embedding: Dense vector representation. sparse_embedding: Optional sparse vector representation for hybrid search. """ - dense_embedding: Optional[List[float]] = None - sparse_embedding: Optional[Tuple[List[int], List[float]]] = None + dense_embedding: Optional[list[float]] = None + sparse_embedding: Optional[tuple[list[int], list[float]]] = None @dataclass @@ -95,7 +92,7 @@ class EmbeddableItem: content: Content id: str = field(default_factory=lambda: str(uuid.uuid4())) index: int = 0 - metadata: Dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) embedding: Optional[Embedding] = None @classmethod @@ -105,7 +102,7 @@ def from_text( *, id: Optional[str] = None, index: int = 0, - metadata: Optional[Dict[str, Any]] = None, + metadata: Optional[dict[str, Any]] = None, ) -> 'EmbeddableItem': """Create an EmbeddableItem with text content. @@ -128,7 +125,7 @@ def from_image( image: Union[bytes, str], *, id: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, + metadata: Optional[dict[str, Any]] = None, ) -> 'EmbeddableItem': """Create an EmbeddableItem with image content. @@ -145,11 +142,11 @@ def from_image( ) @property - def dense_embedding(self) -> Optional[List[float]]: + def dense_embedding(self) -> Optional[list[float]]: return self.embedding.dense_embedding if self.embedding else None @property - def sparse_embedding(self) -> Optional[Tuple[List[int], List[float]]]: + def sparse_embedding(self) -> Optional[tuple[list[int], list[float]]]: return self.embedding.sparse_embedding if self.embedding else None @property diff --git a/sdks/python/apache_beam/ml/rag/utils.py b/sdks/python/apache_beam/ml/rag/utils.py index e2d9962467a1..f56bfe518485 100644 --- a/sdks/python/apache_beam/ml/rag/utils.py +++ b/sdks/python/apache_beam/ml/rag/utils.py @@ -24,11 +24,7 @@ from dataclasses import field from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple -from typing import Type from apache_beam.ml.rag.types import Chunk from apache_beam.ml.rag.types import Content @@ -65,7 +61,7 @@ class MilvusConnectionParameters: db_name: str = "default" token: str = field(default_factory=str) timeout: Optional[float] = None - kwargs: Dict[str, Any] = field(default_factory=dict) + kwargs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if not self.uri: @@ -82,8 +78,8 @@ class MilvusHelpers: """Utility class providing helper methods for Milvus vector db operations.""" @staticmethod def sparse_embedding( - sparse_vector: Optional[Tuple[List[int], List[float]]] - ) -> Optional[Dict[int, float]]: + sparse_vector: Optional[tuple[list[int], list[float]]] + ) -> Optional[dict[int, float]]: if not sparse_vector: return None # Converts sparse embedding from (indices, values) tuple format to @@ -92,7 +88,7 @@ def sparse_embedding( return {int(idx): float(val) for idx, val in zip(indices, values)} -def parse_chunk_strings(chunk_str_list: List[str]) -> List[Chunk]: +def parse_chunk_strings(chunk_str_list: list[str]) -> list[Chunk]: parsed_chunks = [] # Define safe globals and disable built-in functions for safety. @@ -149,7 +145,7 @@ def retry_with_backoff( retry_delay: float = 1.0, retry_backoff_factor: float = 2.0, operation_name: str = "operation", - exception_types: Tuple[Type[BaseException], ...] = (Exception, ) + exception_types: tuple[type[BaseException], ...] = (Exception, ) ) -> Any: """Executes an operation with retry logic and exponential backoff. diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py index 4031777ce152..f52e9df40566 100644 --- a/sdks/python/apache_beam/ml/transforms/base.py +++ b/sdks/python/apache_beam/ml/transforms/base.py @@ -25,10 +25,8 @@ from collections.abc import Sequence from dataclasses import dataclass from typing import Any -from typing import Dict from typing import Generic from typing import Iterable -from typing import List from typing import Optional from typing import TypeVar from typing import Union @@ -85,9 +83,9 @@ class EmbeddingTypeAdapter(Generic[EmbeddingTypeAdapterInputT, input_fn: Function to extract text for embedding from input type output_fn: Function to create output type from input and embeddings """ - input_fn: Callable[[Sequence[EmbeddingTypeAdapterInputT]], List[str]] + input_fn: Callable[[Sequence[EmbeddingTypeAdapterInputT]], list[str]] output_fn: Callable[[Sequence[EmbeddingTypeAdapterInputT], Sequence[Any]], - List[EmbeddingTypeAdapterOutputT]] + list[EmbeddingTypeAdapterOutputT]] def __reduce__(self): """Custom serialization that preserves type information during @@ -184,8 +182,8 @@ def append_transform(self, transform: BaseOperation): def _dict_input_fn( - columns: Sequence[str], batch: Sequence[Union[Dict[str, Any], - beam.Row]]) -> List[str]: + columns: Sequence[str], batch: Sequence[Union[dict[str, Any], + beam.Row]]) -> list[str]: """Extract text from specified columns in batch.""" if batch and hasattr(batch[0], '_asdict'): batch = [row._asdict() if hasattr(row, '_asdict') else row for row in batch] @@ -222,7 +220,7 @@ def _dict_input_fn( def _dict_output_fn( columns: Sequence[str], - batch: Sequence[Union[Dict[str, Any], beam.Row]], + batch: Sequence[Union[dict[str, Any], beam.Row]], embeddings: Sequence[Any]) -> list[Union[dict[str, Any], beam.Row]]: """Map embeddings back to columns in batch.""" is_beam_row = False @@ -244,15 +242,15 @@ def _dict_output_fn( def _create_dict_adapter( - columns: List[str]) -> EmbeddingTypeAdapter[Dict[str, Any], Dict[str, Any]]: + columns: list[str]) -> EmbeddingTypeAdapter[dict[str, Any], dict[str, Any]]: """Create adapter for dict-based processing.""" - return EmbeddingTypeAdapter[Dict[str, Any], Dict[str, Any]]( + return EmbeddingTypeAdapter[dict[str, Any], dict[str, Any]]( input_fn=cast( - Callable[[Sequence[Dict[str, Any]]], List[str]], + Callable[[Sequence[dict[str, Any]]], list[str]], functools.partial(_dict_input_fn, columns)), output_fn=cast( - Callable[[Sequence[Dict[str, Any]], Sequence[Any]], - List[Dict[str, Any]]], + Callable[[Sequence[dict[str, Any]], Sequence[Any]], + list[dict[str, Any]]], functools.partial(_dict_output_fn, columns))) diff --git a/sdks/python/apache_beam/options/pipeline_options.py b/sdks/python/apache_beam/options/pipeline_options.py index d60d75283eab..a79bddb21ab9 100644 --- a/sdks/python/apache_beam/options/pipeline_options.py +++ b/sdks/python/apache_beam/options/pipeline_options.py @@ -26,11 +26,8 @@ import os from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Optional from typing import Sequence -from typing import Type from typing import TypeVar import apache_beam as beam @@ -488,7 +485,7 @@ def get_all_options( retain_unknown_options=False, display_warnings=False, current_only=False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Returns a dictionary of all defined arguments. Returns a dictionary of all defined arguments into a dictionary. @@ -629,7 +626,7 @@ def from_urn(key): def display_data(self): return self.get_all_options(drop_default=True, retain_unknown_options=True) - def view_as(self, cls: Type[PipelineOptionsT]) -> PipelineOptionsT: + def view_as(self, cls: type[PipelineOptionsT]) -> PipelineOptionsT: """Returns a view of current object as provided PipelineOption subclass. Example Usage:: @@ -687,11 +684,11 @@ def is_compat_version_prior_to(self, breaking_change_version): v2_parts = (breaking_change_version.split('.') + ['0', '0', '0'])[:3] return tuple(map(int, v1_parts)) < tuple(map(int, v2_parts)) - def _visible_option_list(self) -> List[str]: + def _visible_option_list(self) -> list[str]: return sorted( option for option in dir(self._visible_options) if option[0] != '_') - def __dir__(self) -> List[str]: + def __dir__(self) -> list[str]: return sorted( dir(type(self)) + list(self.__dict__) + self._visible_option_list()) @@ -853,7 +850,7 @@ def additional_option_ptransform_fn(): # Optional type checks that aren't enabled by default. -additional_type_checks: Dict[str, Callable[[], None]] = { +additional_type_checks: dict[str, Callable[[], None]] = { 'ptransform_fn': additional_option_ptransform_fn, } @@ -2169,7 +2166,7 @@ class OptionsContext(object): Can also be used as a decorator. """ - overrides: List[Dict[str, Any]] = [] + overrides: list[dict[str, Any]] = [] def __init__(self, **options): self.options = options diff --git a/sdks/python/apache_beam/options/value_provider.py b/sdks/python/apache_beam/options/value_provider.py index fa1649beed26..e6cddafc5cb9 100644 --- a/sdks/python/apache_beam/options/value_provider.py +++ b/sdks/python/apache_beam/options/value_provider.py @@ -25,7 +25,6 @@ # pytype: skip-file from functools import wraps -from typing import Set from apache_beam import error @@ -95,7 +94,7 @@ class RuntimeValueProvider(ValueProvider): at graph construction time. """ runtime_options = None - experiments: Set[str] = set() + experiments: set[str] = set() def __init__(self, option_name, value_type, default_value): self.option_name = option_name diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py index 3cce2c5bb773..750868f7443a 100644 --- a/sdks/python/apache_beam/pipeline.py +++ b/sdks/python/apache_beam/pipeline.py @@ -65,7 +65,6 @@ from typing import TYPE_CHECKING from typing import Any from typing import Optional -from typing import Type from typing import Union from google.protobuf import message @@ -644,7 +643,7 @@ def __enter__(self) -> 'Pipeline': def __exit__( self, - exc_type: Optional[Type[BaseException]], + exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional['TracebackType']) -> None: @@ -1021,7 +1020,7 @@ def expand(self, pcoll): if pcoll.element_type is None: pcoll.element_type = typehints.Any - def __reduce__(self) -> tuple[Type, tuple[str, ...]]: + def __reduce__(self) -> tuple[type, tuple[str, ...]]: # Some transforms contain a reference to their enclosing pipeline, # which in turn reference all other transforms (resulting in quadratic # time/space to pickle each transform individually). As we don't diff --git a/sdks/python/apache_beam/pvalue.py b/sdks/python/apache_beam/pvalue.py index 1cd220cc2566..ee885af13367 100644 --- a/sdks/python/apache_beam/pvalue.py +++ b/sdks/python/apache_beam/pvalue.py @@ -30,7 +30,6 @@ import itertools from typing import TYPE_CHECKING from typing import Any -from typing import Dict from typing import Generic from typing import Iterator from typing import NamedTuple @@ -255,7 +254,7 @@ def __init__( # gets applied. self.producer: Optional[AppliedPTransform] = None # Dictionary of PCollections already associated with tags. - self._pcolls: Dict[Optional[str], PCollection] = {} + self._pcolls: dict[Optional[str], PCollection] = {} def __str__(self): return '<%s>' % self._str_internal() diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index 034090cf7bdc..c22072dbf8b9 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -22,6 +22,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import logging import sys import threading diff --git a/sdks/python/apache_beam/runners/dask/dask_runner.py b/sdks/python/apache_beam/runners/dask/dask_runner.py index bc915d300857..b14449ab2fad 100644 --- a/sdks/python/apache_beam/runners/dask/dask_runner.py +++ b/sdks/python/apache_beam/runners/dask/dask_runner.py @@ -59,7 +59,7 @@ def _parse_timeout(candidate): return dask.config.no_default @staticmethod - def _extract_bag_kwargs(dask_options: t.Dict) -> t.Dict: + def _extract_bag_kwargs(dask_options: dict) -> dict: """Parse keyword arguments for `dask.Bag`s; used in graph translation.""" out = {} @@ -174,7 +174,7 @@ def to_dask_bag_visitor(bag_kwargs=None) -> PipelineVisitor: @dataclasses.dataclass class DaskBagVisitor(PipelineVisitor): - bags: t.Dict[AppliedPTransform, db.Bag] = dataclasses.field( + bags: dict[AppliedPTransform, db.Bag] = dataclasses.field( default_factory=collections.OrderedDict) def visit_transform(self, transform_node: AppliedPTransform) -> None: diff --git a/sdks/python/apache_beam/runners/dask/overrides.py b/sdks/python/apache_beam/runners/dask/overrides.py index b952834f12d7..357e51208775 100644 --- a/sdks/python/apache_beam/runners/dask/overrides.py +++ b/sdks/python/apache_beam/runners/dask/overrides.py @@ -32,7 +32,7 @@ @dataclasses.dataclass class _Create(beam.PTransform): - values: t.Tuple[t.Any] + values: tuple[t.Any] def expand(self, input_or_inputs): return beam.pvalue.PCollection.from_(input_or_inputs) @@ -56,8 +56,8 @@ def expand(self, input_or_inputs): return beam.pvalue.PCollection.from_(input_or_inputs) -@typehints.with_input_types(t.Tuple[K, V]) -@typehints.with_output_types(t.Tuple[K, t.Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, t.Iterable[V]]) class _GroupByKeyOnly(beam.PTransform): def expand(self, input_or_inputs): return beam.pvalue.PCollection.from_(input_or_inputs) @@ -70,8 +70,8 @@ def infer_output_type(self, input_type): return typehints.KV[key_type, typehints.Iterable[value_type]] -@typehints.with_input_types(t.Tuple[K, t.Iterable[V]]) -@typehints.with_output_types(t.Tuple[K, t.Iterable[V]]) +@typehints.with_input_types(tuple[K, t.Iterable[V]]) +@typehints.with_output_types(tuple[K, t.Iterable[V]]) class _GroupAlsoByWindow(beam.ParDo): def __init__(self, windowing): super().__init__(_GroupAlsoByWindowDoFn(windowing)) @@ -81,8 +81,8 @@ def expand(self, input_or_inputs): return beam.pvalue.PCollection.from_(input_or_inputs) -@typehints.with_input_types(t.Tuple[K, V]) -@typehints.with_output_types(t.Tuple[K, t.Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, t.Iterable[V]]) class _GroupByKey(beam.PTransform): def expand(self, input_or_inputs): return ( @@ -105,7 +105,7 @@ def expand(self, input_or_inputs): return beam.pvalue.PCollection(self.pipeline, is_bounded=is_bounded) -def dask_overrides() -> t.List[PTransformOverride]: +def dask_overrides() -> list[PTransformOverride]: class CreateOverride(PTransformOverride): def matches(self, applied_ptransform: AppliedPTransform) -> bool: return applied_ptransform.transform.__class__ == beam.Create diff --git a/sdks/python/apache_beam/runners/dask/transform_evaluator.py b/sdks/python/apache_beam/runners/dask/transform_evaluator.py index 6fd216fadb53..dbf55a3cee0d 100644 --- a/sdks/python/apache_beam/runners/dask/transform_evaluator.py +++ b/sdks/python/apache_beam/runners/dask/transform_evaluator.py @@ -100,7 +100,7 @@ def __iter__(self): class TaggingReceiver(Receiver): """A Receiver that handles tagged `WindowValue`s.""" tag: str - values: t.List[PCollVal] + values: list[PCollVal] def receive(self, windowed_value: WindowedValue): if self.tag: @@ -113,7 +113,7 @@ def receive(self, windowed_value: WindowedValue): @dataclasses.dataclass class OneReceiver(dict): """A Receiver that tags value via dictionary lookup key.""" - values: t.List[PCollVal] = field(default_factory=list) + values: list[PCollVal] = field(default_factory=list) def __missing__(self, key): if key not in self: @@ -135,7 +135,7 @@ class DaskBagOp(abc.ABC): from the pipeline's `DaskOptions`. """ applied: AppliedPTransform - bag_kwargs: t.Dict = dataclasses.field(default_factory=dict) + bag_kwargs: dict = dataclasses.field(default_factory=dict) @property def transform(self): @@ -263,7 +263,7 @@ def value(item): class Flatten(DaskBagOp): """Produces a flattened bag from a collection of bags.""" def apply( - self, input_bag: t.List[db.Bag], side_inputs: OpSide = None) -> db.Bag: + self, input_bag: list[db.Bag], side_inputs: OpSide = None) -> db.Bag: assert isinstance(input_bag, list), 'Must take a sequence of bags!' return db.concat(input_bag) diff --git a/sdks/python/apache_beam/runners/direct/bundle_factory.py b/sdks/python/apache_beam/runners/direct/bundle_factory.py index 95d8c06111a2..ef87b168d93e 100644 --- a/sdks/python/apache_beam/runners/direct/bundle_factory.py +++ b/sdks/python/apache_beam/runners/direct/bundle_factory.py @@ -21,7 +21,6 @@ from typing import Iterable from typing import Iterator -from typing import List from typing import Union from typing import cast @@ -124,7 +123,7 @@ def __init__( stacked: bool = True) -> None: assert isinstance(pcollection, (pvalue.PBegin, pvalue.PCollection)) self._pcollection = pcollection - self._elements: List[Union[WindowedValue, + self._elements: list[Union[WindowedValue, _Bundle._StackedWindowedValues]] = [] self._stacked = stacked self._committed = False @@ -144,7 +143,7 @@ def get_elements_iterable(self, """ if not self._stacked: # we can safely assume self._elements contains only WindowedValues - elements = cast('List[WindowedValue]', self._elements) + elements = cast('list[WindowedValue]', self._elements) if self._committed and not make_copy: return elements return list(elements) diff --git a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py index 91085274f32a..d4559607ae94 100644 --- a/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py +++ b/sdks/python/apache_beam/runners/direct/consumer_tracking_pipeline_visitor.py @@ -19,9 +19,6 @@ # pytype: skip-file -from typing import Dict -from typing import Set - from apache_beam import pvalue from apache_beam.pipeline import AppliedPTransform from apache_beam.pipeline import PipelineVisitor @@ -37,9 +34,9 @@ class ConsumerTrackingPipelineVisitor(PipelineVisitor): transform has produced and committed output. """ def __init__(self): - self.value_to_consumers: Dict[pvalue.PValue, Set[AppliedPTransform]] = {} - self.root_transforms: Set[AppliedPTransform] = set() - self.step_names: Dict[AppliedPTransform, str] = {} + self.value_to_consumers: dict[pvalue.PValue, set[AppliedPTransform]] = {} + self.root_transforms: set[AppliedPTransform] = set() + self.step_names: dict[AppliedPTransform, str] = {} self._num_transforms = 0 self._views = set() diff --git a/sdks/python/apache_beam/runners/direct/direct_runner.py b/sdks/python/apache_beam/runners/direct/direct_runner.py index 73b0321b5de4..443dbd63063d 100644 --- a/sdks/python/apache_beam/runners/direct/direct_runner.py +++ b/sdks/python/apache_beam/runners/direct/direct_runner.py @@ -250,8 +250,8 @@ def visit_transform(self, applied_ptransform): V = typing.TypeVar('V') -@typehints.with_input_types(typing.Tuple[K, V]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class _GroupByKeyOnly(PTransform): """A group by key transform, ignoring windows.""" def infer_output_type(self, input_type): @@ -263,8 +263,8 @@ def expand(self, pcoll): return PCollection.from_(pcoll) -@typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, typing.Iterable[V]]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class _GroupAlsoByWindow(ParDo): """The GroupAlsoByWindow transform.""" def __init__(self, windowing): @@ -301,8 +301,8 @@ def process(self, element): return self.driver.process_entire_key(k, vs) -@typehints.with_input_types(typing.Tuple[K, V]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class _StreamingGroupByKeyOnly(_GroupByKeyOnly): """Streaming GroupByKeyOnly placeholder for overriding in DirectRunner.""" urn = "direct_runner:streaming_gbko:v0.1" @@ -318,8 +318,8 @@ def from_runner_api_parameter( return _StreamingGroupByKeyOnly() -@typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, typing.Iterable[V]]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class _StreamingGroupAlsoByWindow(_GroupAlsoByWindow): """Streaming GroupAlsoByWindow placeholder for overriding in DirectRunner.""" urn = "direct_runner:streaming_gabw:v0.1" @@ -338,8 +338,8 @@ def from_runner_api_parameter(unused_ptransform, payload, context): context.windowing_strategies.get_by_id(payload.value)) -@typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, typing.Iterable[V]]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class _GroupByKey(PTransform): """The DirectRunner GroupByKey implementation.""" def expand(self, pcoll): diff --git a/sdks/python/apache_beam/runners/direct/evaluation_context.py b/sdks/python/apache_beam/runners/direct/evaluation_context.py index 6138577bb91d..a6618f000d29 100644 --- a/sdks/python/apache_beam/runners/direct/evaluation_context.py +++ b/sdks/python/apache_beam/runners/direct/evaluation_context.py @@ -23,12 +23,8 @@ import threading from typing import TYPE_CHECKING from typing import Any -from typing import DefaultDict -from typing import Dict from typing import Iterable -from typing import List from typing import Optional -from typing import Tuple from typing import Union from apache_beam import pvalue @@ -91,10 +87,10 @@ class _SideInputsContainer(object): """ def __init__(self, side_inputs: Iterable['pvalue.AsSideInput']) -> None: self._lock = threading.Lock() - self._views: Dict[pvalue.AsSideInput, _SideInputView] = {} - self._transform_to_side_inputs: DefaultDict[ + self._views: dict[pvalue.AsSideInput, _SideInputView] = {} + self._transform_to_side_inputs: collections.defaultdict[ Optional[AppliedPTransform], - List[pvalue.AsSideInput]] = collections.defaultdict(list) + list[pvalue.AsSideInput]] = collections.defaultdict(list) # this appears unused: self._side_input_to_blocked_tasks = collections.defaultdict(list) # type: ignore @@ -139,7 +135,7 @@ def add_values(self, side_input, values): view.elements.extend(values) def update_watermarks_for_transform_and_unblock_tasks( - self, ptransform, watermark) -> List[Tuple[TransformExecutor, Timestamp]]: + self, ptransform, watermark) -> list[tuple[TransformExecutor, Timestamp]]: """Updates _SideInputsContainer after a watermark update and unbloks tasks. It traverses the list of side inputs per PTransform and calls @@ -160,7 +156,7 @@ def update_watermarks_for_transform_and_unblock_tasks( return unblocked_tasks def _update_watermarks_for_side_input_and_unblock_tasks( - self, side_input, watermark) -> List[Tuple[TransformExecutor, Timestamp]]: + self, side_input, watermark) -> list[tuple[TransformExecutor, Timestamp]]: """Helps update _SideInputsContainer after a watermark update. For each view of the side input, it updates the value of the watermark @@ -241,9 +237,9 @@ def __init__( self._value_to_consumers = value_to_consumers self._step_names = step_names self.views = views - self._pcollection_to_views: DefaultDict[ + self._pcollection_to_views: collections.defaultdict[ pvalue.PValue, - List[pvalue.AsSideInput]] = collections.defaultdict(list) + list[pvalue.AsSideInput]] = collections.defaultdict(list) for view in views: self._pcollection_to_views[view.pvalue].append(view) self._transform_keyed_states = self._initialize_keyed_states( @@ -254,7 +250,7 @@ def __init__( root_transforms, value_to_consumers, self._transform_keyed_states) - self._pending_unblocked_tasks: List[Tuple[TransformExecutor, + self._pending_unblocked_tasks: list[tuple[TransformExecutor, Timestamp]] = [] self._counter_factory = counters.CounterFactory() self._metrics = DirectMetrics() @@ -370,7 +366,7 @@ def _commit_bundles( self, uncommitted_bundles: Iterable['_Bundle'], unprocessed_bundles: Iterable['_Bundle'] - ) -> Tuple[Tuple['_Bundle', ...], Tuple['_Bundle', ...]]: + ) -> tuple[tuple['_Bundle', ...], tuple['_Bundle', ...]]: """Commits bundles and returns a immutable set of committed bundles.""" for in_progress_bundle in uncommitted_bundles: producing_applied_ptransform = in_progress_bundle.pcollection.producer @@ -401,7 +397,7 @@ def create_empty_committed_bundle( output_pcollection) def extract_all_timers( - self) -> Tuple[List[Tuple[AppliedPTransform, List['TimerFiring']]], bool]: + self) -> tuple[list[tuple[AppliedPTransform, list['TimerFiring']]], bool]: return self._watermark_manager.extract_all_timers() def is_done(self, transform: Optional[AppliedPTransform] = None) -> bool: diff --git a/sdks/python/apache_beam/runners/direct/executor.py b/sdks/python/apache_beam/runners/direct/executor.py index e8be9d64f993..6bc2b0e4c3f1 100644 --- a/sdks/python/apache_beam/runners/direct/executor.py +++ b/sdks/python/apache_beam/runners/direct/executor.py @@ -27,10 +27,7 @@ import traceback from typing import TYPE_CHECKING from typing import Any -from typing import Dict -from typing import FrozenSet from typing import Optional -from typing import Set from weakref import WeakValueDictionary from apache_beam.metrics.execution import MetricsContainer @@ -145,7 +142,7 @@ def shutdown(self): class _TransformEvaluationState(object): - def __init__(self, executor_service, scheduled: Set['TransformExecutor']): + def __init__(self, executor_service, scheduled: set['TransformExecutor']): self.executor_service = executor_service self.scheduled = scheduled @@ -212,7 +209,7 @@ class _TransformExecutorServices(object): """ def __init__(self, executor_service: _ExecutorService) -> None: self._executor_service = executor_service - self._scheduled: Set[TransformExecutor] = set() + self._scheduled: set[TransformExecutor] = set() self._parallel = _ParallelEvaluationState( self._executor_service, self._scheduled) self._serial_cache: WeakValueDictionary[ @@ -229,7 +226,7 @@ def serial(self, step: Any) -> _SerialEvaluationState: return cached @property - def executors(self) -> FrozenSet['TransformExecutor']: + def executors(self) -> frozenset['TransformExecutor']: return frozenset(self._scheduled) @@ -305,7 +302,7 @@ def __init__( self._applied_ptransform = applied_ptransform self._completion_callback = completion_callback self._transform_evaluation_state = transform_evaluation_state - self._side_input_values: Dict[pvalue.AsSideInput, Any] = {} + self._side_input_values: dict[pvalue.AsSideInput, Any] = {} self.blocked = False self._call_count = 0 self._retry_count = 0 diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py index 49e7d9d02106..2349a0881e40 100644 --- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py +++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py @@ -27,10 +27,6 @@ from collections import abc from typing import TYPE_CHECKING from typing import Any -from typing import Dict -from typing import List -from typing import Tuple -from typing import Type from apache_beam import coders from apache_beam import io @@ -89,13 +85,13 @@ class TransformEvaluatorRegistry(object): Creates instances of TransformEvaluator for the application of a transform. """ - _test_evaluators_overrides: Dict[Type[core.PTransform], - Type['_TransformEvaluator']] = {} + _test_evaluators_overrides: dict[type[core.PTransform], + type['_TransformEvaluator']] = {} def __init__(self, evaluation_context: 'EvaluationContext') -> None: assert evaluation_context self._evaluation_context = evaluation_context - self._evaluators: Dict[Type[core.PTransform], Type[_TransformEvaluator]] = { + self._evaluators: dict[type[core.PTransform], type[_TransformEvaluator]] = { io.Read: _BoundedReadEvaluator, _DirectReadFromPubSub: _PubSubReadEvaluator, core.Flatten: _FlattenEvaluator, @@ -587,7 +583,7 @@ class _PubSubReadEvaluator(_TransformEvaluator): # A mapping of transform to _PubSubSubscriptionWrapper. # TODO(https://github.com/apache/beam/issues/19751): Prevents garbage # collection of pipeline instances. - _subscription_cache: Dict[AppliedPTransform, str] = {} + _subscription_cache: dict[AppliedPTransform, str] = {} def __init__( self, @@ -651,7 +647,7 @@ def process_element(self, element): pass def _read_from_pubsub( - self, timestamp_attribute) -> List[Tuple[Timestamp, 'PubsubMessage']]: + self, timestamp_attribute) -> list[tuple[Timestamp, 'PubsubMessage']]: from google.cloud import pubsub from apache_beam.io.gcp.pubsub import PubsubMessage diff --git a/sdks/python/apache_beam/runners/direct/watermark_manager.py b/sdks/python/apache_beam/runners/direct/watermark_manager.py index 666ade6cf82d..90970ce7f669 100644 --- a/sdks/python/apache_beam/runners/direct/watermark_manager.py +++ b/sdks/python/apache_beam/runners/direct/watermark_manager.py @@ -21,11 +21,7 @@ import threading from typing import TYPE_CHECKING -from typing import Dict from typing import Iterable -from typing import List -from typing import Set -from typing import Tuple from apache_beam import pipeline from apache_beam import pvalue @@ -55,7 +51,7 @@ def __init__( self._value_to_consumers = value_to_consumers self._transform_keyed_states = transform_keyed_states # AppliedPTransform -> TransformWatermarks - self._transform_to_watermarks: Dict[AppliedPTransform, + self._transform_to_watermarks: dict[AppliedPTransform, _TransformWatermarks] = {} for root_transform in root_transforms: @@ -179,10 +175,10 @@ def _refresh_watermarks(self, applied_ptransform, side_inputs_container): return unblocked_tasks def extract_all_timers( - self) -> Tuple[List[Tuple[AppliedPTransform, List[TimerFiring]]], bool]: + self) -> tuple[list[tuple[AppliedPTransform, list[TimerFiring]]], bool]: """Extracts fired timers for all transforms and reports if there are any timers set.""" - all_timers: List[Tuple[AppliedPTransform, List[TimerFiring]]] = [] + all_timers: list[tuple[AppliedPTransform, list[TimerFiring]]] = [] has_realtime_timer = False for applied_ptransform, tw in self._transform_to_watermarks.items(): fired_timers, had_realtime_timer = tw.extract_transform_timers() @@ -201,19 +197,19 @@ class _TransformWatermarks(object): def __init__(self, clock, keyed_states, transform): self._clock = clock self._keyed_states = keyed_states - self._input_transform_watermarks: List[_TransformWatermarks] = [] + self._input_transform_watermarks: list[_TransformWatermarks] = [] self._input_watermark = WatermarkManager.WATERMARK_NEG_INF self._output_watermark = WatermarkManager.WATERMARK_NEG_INF self._keyed_earliest_holds = {} # Scheduled bundles targeted for this transform. - self._pending: Set['_Bundle'] = set() + self._pending: set['_Bundle'] = set() self._fired_timers = set() self._lock = threading.Lock() self._label = str(transform) def update_input_transform_watermarks( - self, input_transform_watermarks: List['_TransformWatermarks']) -> None: + self, input_transform_watermarks: list['_TransformWatermarks']) -> None: with self._lock: self._input_transform_watermarks = input_transform_watermarks @@ -300,7 +296,7 @@ def refresh(self) -> bool: def synchronized_processing_output_time(self): return self._clock.time() - def extract_transform_timers(self) -> Tuple[List[TimerFiring], bool]: + def extract_transform_timers(self) -> tuple[list[TimerFiring], bool]: """Extracts fired timers and reports of any timers set per transform.""" with self._lock: fired_timers = [] diff --git a/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py b/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py index 519bf3514c53..2b64f44755d4 100644 --- a/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py +++ b/sdks/python/apache_beam/runners/interactive/augmented_pipeline.py @@ -22,9 +22,7 @@ # pytype: skip-file import copy -from typing import Dict from typing import Optional -from typing import Set import apache_beam as beam from apache_beam.portability.api import beam_runner_api_pb2 @@ -43,7 +41,7 @@ class AugmentedPipeline: def __init__( self, user_pipeline: beam.Pipeline, - pcolls: Optional[Set[beam.pvalue.PCollection]] = None): + pcolls: Optional[set[beam.pvalue.PCollection]] = None): """ Initializes a pipelilne for augmenting interactive flavor. @@ -77,7 +75,7 @@ def augmented_pipeline(self) -> beam_runner_api_pb2.Pipeline: def background_recording_pipeline(self) -> beam_runner_api_pb2.Pipeline: raise NotImplementedError - def cacheables(self) -> Dict[beam.pvalue.PCollection, Cacheable]: + def cacheables(self) -> dict[beam.pvalue.PCollection, Cacheable]: """Finds all the cacheable intermediate PCollections in the pipeline with their metadata. """ diff --git a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py index ac7ef5cae561..2b4b3bbef1ba 100644 --- a/sdks/python/apache_beam/runners/interactive/caching/read_cache.py +++ b/sdks/python/apache_beam/runners/interactive/caching/read_cache.py @@ -21,8 +21,6 @@ """ # pytype: skip-file -from typing import Tuple - import apache_beam as beam from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.runners.interactive import cache_manager as cache @@ -47,7 +45,7 @@ def __init__( self._cacheable = cacheable self._key = repr(cacheable.to_key()) - def read_cache(self) -> Tuple[str, str]: + def read_cache(self) -> tuple[str, str]: """Reads cache of the cacheable PCollection and wires the cache into the pipeline proto. Returns the pipeline-scoped ids of the cacheable PCollection and the cache reading output PCollection that replaces it. @@ -118,7 +116,7 @@ def read_cache(self) -> Tuple[str, str]: return source_id, output_id def _build_runner_api_template( - self) -> Tuple[beam_runner_api_pb2.Pipeline, beam.pvalue.PCollection]: + self) -> tuple[beam_runner_api_pb2.Pipeline, beam.pvalue.PCollection]: transform = _ReadCacheTransform(self._cache_manager, self._key) tmp_pipeline = beam.Pipeline() tmp_pipeline.component_id_map = self._context.component_id_map diff --git a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py index e56073b009cb..0e1c58bb342e 100644 --- a/sdks/python/apache_beam/runners/interactive/caching/write_cache.py +++ b/sdks/python/apache_beam/runners/interactive/caching/write_cache.py @@ -21,8 +21,6 @@ """ # pytype: skip-file -from typing import Tuple - import apache_beam as beam from apache_beam.portability.api import beam_runner_api_pb2 from apache_beam.runners.interactive import cache_manager as cache @@ -124,7 +122,7 @@ def write_cache(self) -> None: inputs[key] = input_id def _build_runner_api_template( - self) -> Tuple[beam_runner_api_pb2.Pipeline, '_PCollectionPlaceHolder']: + self) -> tuple[beam_runner_api_pb2.Pipeline, '_PCollectionPlaceHolder']: pph = _PCollectionPlaceHolder(self._cacheable.pcoll, self._context) transform = _WriteCacheTransform(self._cache_manager, self._key) _ = pph.placeholder_pcoll | 'sink_cache_' + self._key >> transform diff --git a/sdks/python/apache_beam/runners/interactive/dataproc/dataproc_cluster_manager.py b/sdks/python/apache_beam/runners/interactive/dataproc/dataproc_cluster_manager.py index f15541d423ac..817d4f6b583b 100644 --- a/sdks/python/apache_beam/runners/interactive/dataproc/dataproc_cluster_manager.py +++ b/sdks/python/apache_beam/runners/interactive/dataproc/dataproc_cluster_manager.py @@ -21,7 +21,6 @@ import re import time from typing import Optional -from typing import Tuple from apache_beam import version as beam_version from apache_beam.options.pipeline_options import PipelineOptions @@ -314,7 +313,7 @@ def get_staging_location(self) -> str: self.cluster_metadata.cluster_name) raise e - def parse_master_url_and_dashboard(self, line: str) -> Tuple[str, str]: + def parse_master_url_and_dashboard(self, line: str) -> tuple[str, str]: """Parses the master_url and YARN application_id of the Flink process from an input line. The line containing both the master_url and application id is always formatted as such: @@ -340,7 +339,7 @@ def parse_master_url_and_dashboard(self, line: str) -> Tuple[str, str]: yarn_endpoint) return master_url, dashboard - def get_master_url_and_dashboard(self) -> Tuple[Optional[str], Optional[str]]: + def get_master_url_and_dashboard(self) -> tuple[Optional[str], Optional[str]]: """Returns the master_url of the current cluster.""" startup_logs = [] for file in self._fs._list(self._staging_directory): diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py index 10058351938e..e1d66cd10600 100644 --- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py +++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph.py @@ -25,11 +25,7 @@ import collections import logging import threading -from typing import DefaultDict -from typing import Dict from typing import Iterator -from typing import List -from typing import Tuple from typing import Union import apache_beam as beam @@ -93,9 +89,10 @@ def __init__( (beam_runner_api_pb2.Pipeline, beam.Pipeline, type(pipeline))) # A dict from PCollection ID to a list of its consuming Transform IDs - self._consumers: DefaultDict[str, List[str]] = collections.defaultdict(list) + self._consumers: collections.defaultdict[ + str, list[str]] = collections.defaultdict(list) # A dict from PCollection ID to its producing Transform ID - self._producers: Dict[str, str] = {} + self._producers: dict[str, str] = {} for transform_id, transform_proto in self._top_level_transforms(): for pcoll_id in transform_proto.inputs.values(): @@ -132,7 +129,7 @@ def display_graph(self): 'pipeline graph.') def _top_level_transforms( - self) -> Iterator[Tuple[str, beam_runner_api_pb2.PTransform]]: + self) -> Iterator[tuple[str, beam_runner_api_pb2.PTransform]]: """Yields all top level PTransforms (subtransforms of the root PTransform). Yields: (str, PTransform proto) ID, proto pair of top level PTransforms. diff --git a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py index ad46f5d65ea3..0cc0cfb7de92 100644 --- a/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py +++ b/sdks/python/apache_beam/runners/interactive/display/pipeline_graph_renderer.py @@ -27,7 +27,6 @@ import subprocess from typing import TYPE_CHECKING from typing import Optional -from typing import Type from apache_beam.utils.plugin import BeamPlugin @@ -95,7 +94,7 @@ def render_pipeline_graph(self, pipeline_graph: 'PipelineGraph') -> str: return pipeline_graph._get_graph().create_svg().decode("utf-8") # pylint: disable=protected-access -def get_renderer(option: Optional[str] = None) -> Type[PipelineGraphRenderer]: +def get_renderer(option: Optional[str] = None) -> type[PipelineGraphRenderer]: """Get an instance of PipelineGraphRenderer given rendering option. Args: diff --git a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py index aebca7b85d65..438a3c4f4e43 100644 --- a/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py +++ b/sdks/python/apache_beam/runners/interactive/extensions/apache-beam-jupyterlab-sidepanel/apache_beam_jupyterlab_sidepanel/yaml_parse_utils.py @@ -14,8 +14,6 @@ import json from dataclasses import dataclass from typing import Any -from typing import Dict -from typing import List from typing import TypedDict import yaml @@ -50,8 +48,8 @@ def __post_init__(self): class FlowGraph(TypedDict): - nodes: List[Dict[str, Any]] - edges: List[Dict[str, Any]] + nodes: list[dict[str, Any]] + edges: list[dict[str, Any]] # ======================== Main Function ======================== @@ -96,8 +94,8 @@ def parse_beam_yaml(yaml_str: str, isDryRunMode: bool = False) -> str: pipeline = parsed_yaml['pipeline'] transforms = pipeline.get('transforms', []) - nodes: List[NodeData] = [] - edges: List[EdgeData] = [] + nodes: list[NodeData] = [] + edges: list[EdgeData] = [] nodes.append(NodeData(id='0', label='Input', type='input')) nodes.append(NodeData(id='1', label='Output', type='output')) @@ -144,7 +142,7 @@ def to_dict(node): def build_success_response( - nodes: List[Dict[str, Any]], edges: List[Dict[str, Any]]) -> str: + nodes: list[dict[str, Any]], edges: list[dict[str, Any]]) -> str: """Build success response""" return json.dumps({'data': {'nodes': nodes, 'edges': edges}, 'error': None}) diff --git a/sdks/python/apache_beam/runners/interactive/options/capture_control.py b/sdks/python/apache_beam/runners/interactive/options/capture_control.py index 826b596bbc6d..fad73e6d15c0 100644 --- a/sdks/python/apache_beam/runners/interactive/options/capture_control.py +++ b/sdks/python/apache_beam/runners/interactive/options/capture_control.py @@ -25,7 +25,6 @@ import logging from datetime import timedelta -from typing import List from apache_beam.io.gcp.pubsub import ReadFromPubSub from apache_beam.runners.interactive import interactive_environment as ie @@ -46,7 +45,7 @@ def __init__(self): self._capture_size_limit = 1e9 self._test_limiters = None - def limiters(self) -> List['capture_limiters.Limiter']: + def limiters(self) -> list['capture_limiters.Limiter']: # noqa: F821 if self._test_limiters: return self._test_limiters @@ -56,7 +55,7 @@ def limiters(self) -> List['capture_limiters.Limiter']: ] def set_limiters_for_test( - self, limiters: List['capture_limiters.Limiter']) -> None: + self, limiters: list['capture_limiters.Limiter']) -> None: # noqa: F821 self._test_limiters = limiters diff --git a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py index 07e35f96877c..ceb53e3eb766 100644 --- a/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py +++ b/sdks/python/apache_beam/runners/interactive/pipeline_instrument.py @@ -24,7 +24,6 @@ # pytype: skip-file import logging -from typing import Dict import apache_beam as beam from apache_beam.pipeline import PipelineVisitor @@ -332,7 +331,7 @@ def set_proto_map(proto_map, new_value): return pipeline_to_execute @property - def cacheables(self) -> Dict[str, Cacheable]: + def cacheables(self) -> dict[str, Cacheable]: """Returns the Cacheables by PCollection ids. If you're already working with user defined pipelines and PCollections, @@ -372,7 +371,7 @@ def runner_pcoll_to_user_pcoll(self): pipeline to instances in the user pipeline.""" return self._runner_pcoll_to_user_pcoll - def find_cacheables(self) -> Dict[str, Cacheable]: + def find_cacheables(self) -> dict[str, Cacheable]: """Finds PCollections that need to be cached for analyzed pipeline. There might be multiple pipelines defined and watched, this will only find diff --git a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py index 3dc866907a40..ade417e39690 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py +++ b/sdks/python/apache_beam/runners/interactive/sql/beam_sql_magics.py @@ -25,10 +25,7 @@ import keyword import logging import traceback -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union from IPython.core.magic import Magics @@ -126,7 +123,7 @@ def __init__(self): 'names and the SQL keywords, such as "SELECT", "FROM", "WHERE" and ' 'etc.')) - def parse(self, args: List[str]) -> Optional[argparse.Namespace]: + def parse(self, args: list[str]) -> Optional[argparse.Namespace]: """Parses a list of string inputs. The parsed namespace contains these attributes: @@ -246,7 +243,7 @@ def beam_sql(self, line: str, cell: Optional[str] = None) -> Optional[PValue]: @progress_indicated -def collect_data_for_local_run(query: str, found: Dict[str, beam.PCollection]): +def collect_data_for_local_run(query: str, found: dict[str, beam.PCollection]): from apache_beam.runners.interactive import interactive_beam as ib for name, pcoll in found.items(): try: @@ -269,8 +266,8 @@ def collect_data_for_local_run(query: str, found: Dict[str, beam.PCollection]): def apply_sql( query: str, output_name: Optional[str], - found: Dict[str, beam.PCollection], - run: bool = True) -> Tuple[str, Union[PValue, SqlNode], SqlChain]: + found: dict[str, beam.PCollection], + run: bool = True) -> tuple[str, Union[PValue, SqlNode], SqlChain]: """Applies a SqlTransform with the given sql and queried PCollections. Args: @@ -312,7 +309,7 @@ def apply_sql( def pcolls_from_streaming_cache( user_pipeline: beam.Pipeline, query_pipeline: beam.Pipeline, - name_to_pcoll: Dict[str, beam.PCollection]) -> Dict[str, beam.PCollection]: + name_to_pcoll: dict[str, beam.PCollection]) -> dict[str, beam.PCollection]: """Reads PCollection cache through the TestStream. Args: @@ -364,7 +361,7 @@ def exception_handler(e): def _generate_output_name( output_name: Optional[str], query: str, - found: Dict[str, beam.PCollection]) -> str: + found: dict[str, beam.PCollection]) -> str: """Generates a unique output name if None is provided. Otherwise, returns the given output name directly. @@ -379,11 +376,11 @@ def _generate_output_name( def _build_query_components( query: str, - found: Dict[str, beam.PCollection], + found: dict[str, beam.PCollection], output_name: str, run: bool = True -) -> Tuple[str, - Union[Dict[str, beam.PCollection], beam.PCollection, beam.Pipeline], +) -> tuple[str, + Union[dict[str, beam.PCollection], beam.PCollection, beam.Pipeline], SqlChain]: """Builds necessary components needed to apply the SqlTransform. diff --git a/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py b/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py index 9cafbb6c9039..30f9f4bdff53 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py +++ b/sdks/python/apache_beam/runners/interactive/sql/sql_chain.py @@ -26,9 +26,7 @@ import logging from dataclasses import dataclass from typing import Any -from typing import Dict from typing import Optional -from typing import Set from typing import Union import apache_beam as beam @@ -61,10 +59,10 @@ class SqlNode: execution_count: the execution count if in an IPython env. """ output_name: str - source: Union[beam.Pipeline, Set[str]] + source: Union[beam.Pipeline, set[str]] query: str - schemas: Set[Any] = None - evaluated: Set[beam.Pipeline] = None + schemas: set[Any] = None + evaluated: set[beam.Pipeline] = None next: Optional['SqlNode'] = None execution_count: int = 0 @@ -193,7 +191,7 @@ class SqlChain: Otherwise, at least some of the nodes in chain has queried against PCollections. """ - nodes: Dict[str, SqlNode] = None + nodes: dict[str, SqlNode] = None root: Optional[SqlNode] = None current: Optional[SqlNode] = None user_pipeline: Optional[beam.Pipeline] = None diff --git a/sdks/python/apache_beam/runners/interactive/sql/utils.py b/sdks/python/apache_beam/runners/interactive/sql/utils.py index 2e46c0f23a7a..c2c0f4258d83 100644 --- a/sdks/python/apache_beam/runners/interactive/sql/utils.py +++ b/sdks/python/apache_beam/runners/interactive/sql/utils.py @@ -28,10 +28,8 @@ from dataclasses import dataclass from typing import Any from typing import Callable -from typing import Dict from typing import NamedTuple from typing import Optional -from typing import Type from typing import Union import apache_beam as beam @@ -72,8 +70,8 @@ def register_coder_for_schema( def find_pcolls( sql: str, - pcolls: Dict[str, beam.PCollection], - verbose: bool = False) -> Dict[str, beam.PCollection]: + pcolls: dict[str, beam.PCollection], + verbose: bool = False) -> dict[str, beam.PCollection]: """Finds all PCollections used in the given sql query. It does a simple word by word match and calls ib.collect for each PCollection @@ -120,7 +118,7 @@ def pformat_namedtuple(schema: NamedTuple) -> str: ])) -def pformat_dict(raw_input: Dict[str, Any]) -> str: +def pformat_dict(raw_input: dict[str, Any]) -> str: return '{{\n{}\n}}'.format( ',\n'.join(['{}: {}'.format(k, v) for k, v in raw_input.items()])) @@ -146,8 +144,8 @@ class OptionsEntry: """ label: str help: str - cls: Type[PipelineOptions] - arg_builder: Union[str, Dict[str, Optional[Callable]]] + cls: type[PipelineOptions] + arg_builder: Union[str, dict[str, Optional[Callable]]] default: Optional[str] = None def __post_init__(self): diff --git a/sdks/python/apache_beam/runners/interactive/testing/mock_env.py b/sdks/python/apache_beam/runners/interactive/testing/mock_env.py index 9b8f349d785a..d6f7f318a4f0 100644 --- a/sdks/python/apache_beam/runners/interactive/testing/mock_env.py +++ b/sdks/python/apache_beam/runners/interactive/testing/mock_env.py @@ -21,7 +21,6 @@ import unittest import uuid -from typing import Type from unittest.mock import patch from apache_beam.runners.interactive import interactive_environment as ie @@ -30,7 +29,7 @@ from apache_beam.runners.interactive.testing.mock_ipython import mock_get_ipython -def isolated_env(cls: Type[unittest.TestCase]): +def isolated_env(cls: type[unittest.TestCase]): """A class decorator for unittest.TestCase to set up an isolated test environment for Interactive Beam.""" class IsolatedInteractiveEnvironmentTest(cls): diff --git a/sdks/python/apache_beam/runners/interactive/utils.py b/sdks/python/apache_beam/runners/interactive/utils.py index 136fe372c214..399161fd3370 100644 --- a/sdks/python/apache_beam/runners/interactive/utils.py +++ b/sdks/python/apache_beam/runners/interactive/utils.py @@ -25,10 +25,7 @@ import logging from typing import Any from typing import Callable -from typing import Dict from typing import Iterator -from typing import List -from typing import Tuple from typing import Union import pandas as pd @@ -66,7 +63,7 @@ def to_element_list( include_window_info: bool, n: int = None, include_time_events: bool = False, -) -> List[WindowedValue]: +) -> list[WindowedValue]: """Returns an iterator that properly decodes the elements from the reader. """ @@ -107,7 +104,7 @@ def elements(): def elements_to_df( - elements: List[WindowedValue], + elements: list[WindowedValue], include_window_info: bool = False, element_type: Any = None) -> 'DataFrame': # noqa: F821 """Parses the given elements into a Dataframe. @@ -313,7 +310,7 @@ def deferred_df_to_pcollection(df): return to_pcollection(df, yield_elements='pandas', label=str(df._expr)), proxy -def pcoll_by_name() -> Dict[str, beam.PCollection]: +def pcoll_by_name() -> dict[str, beam.PCollection]: """Finds all PCollections by their variable names defined in the notebook.""" from apache_beam.runners.interactive import interactive_environment as ie @@ -340,7 +337,7 @@ def find_pcoll_name(pcoll: beam.PCollection) -> str: return None -def cacheables() -> Dict[CacheKey, Cacheable]: +def cacheables() -> dict[CacheKey, Cacheable]: """Finds all Cacheables with their CacheKeys.""" from apache_beam.runners.interactive import interactive_environment as ie @@ -422,7 +419,7 @@ def visit_transform(self, transform_node): def create_var_in_main(name: str, value: Any, - watch: bool = True) -> Tuple[str, Any]: + watch: bool = True) -> tuple[str, Any]: """Declares a variable in the main module. Args: diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py index f367598f9293..1f06a842fbb7 100644 --- a/sdks/python/apache_beam/runners/pipeline_context.py +++ b/sdks/python/apache_beam/runners/pipeline_context.py @@ -24,13 +24,10 @@ # mypy: disallow-untyped-defs from typing import Any -from typing import Dict -from typing import FrozenSet from typing import Generic from typing import Iterable from typing import Mapping from typing import Optional -from typing import Type from typing import TypeVar from typing import Union @@ -72,14 +69,14 @@ class _PipelineContextMap(Generic[PortableObjectT]): def __init__( self, context: 'PipelineContext', - obj_type: Type[PortableObjectT], + obj_type: type[PortableObjectT], namespace: str, proto_map: Optional[Mapping[str, message.Message]] = None) -> None: self._pipeline_context = context self._obj_type = obj_type self._namespace = namespace - self._obj_to_id: Dict[Any, str] = {} - self._id_to_obj: Dict[str, Any] = {} + self._obj_to_id: dict[Any, str] = {} + self._id_to_obj: dict[str, Any] = {} self._id_to_proto = dict(proto_map) if proto_map else {} def populate_map(self, proto_map: Mapping[str, message.Message]) -> None: @@ -131,7 +128,7 @@ def get_by_proto( obj=obj, obj_type=self._obj_type, label=label), maybe_new_proto) - def get_id_to_proto_map(self) -> Dict[str, message.Message]: + def get_id_to_proto_map(self) -> dict[str, message.Message]: return self._id_to_proto def get_proto_from_id(self, id: str) -> message.Message: @@ -232,7 +229,7 @@ def __init__( def add_requirement(self, requirement: str) -> None: self._requirements.add(requirement) - def requirements(self) -> FrozenSet[str]: + def requirements(self) -> frozenset[str]: return frozenset(self._requirements) # If fake coders are requested, return a pickled version of the element type @@ -288,14 +285,14 @@ def default_environment_id(self) -> str: return self._default_environment_id def get_environment_id_for_resource_hints( - self, hints: Dict[str, bytes]) -> str: + self, hints: dict[str, bytes]) -> str: """Returns an environment id that has necessary resource hints.""" if not hints: return self.default_environment_id() def get_or_create_environment_with_resource_hints( template_env_id: str, - resource_hints: Dict[str, bytes], + resource_hints: dict[str, bytes], ) -> str: """Creates an environment that has necessary hints and returns its id.""" template_env = self.environments.get_proto_from_id(template_env_id) diff --git a/sdks/python/apache_beam/runners/portability/abstract_job_service.py b/sdks/python/apache_beam/runners/portability/abstract_job_service.py index 3a3ad1507813..24aee6f619ab 100644 --- a/sdks/python/apache_beam/runners/portability/abstract_job_service.py +++ b/sdks/python/apache_beam/runners/portability/abstract_job_service.py @@ -26,10 +26,8 @@ import zipfile from concurrent import futures from typing import BinaryIO -from typing import Dict from typing import Iterator from typing import Optional -from typing import Tuple from typing import Union import grpc @@ -47,7 +45,7 @@ _LOGGER = logging.getLogger(__name__) -StateEvent = Tuple[int, Union[timestamp_pb2.Timestamp, Timestamp]] +StateEvent = tuple[int, Union[timestamp_pb2.Timestamp, Timestamp]] def make_state_event(state, timestamp): @@ -70,7 +68,7 @@ class AbstractJobServiceServicer(beam_job_api_pb2_grpc.JobServiceServicer): Servicer for the Beam Job API. """ def __init__(self): - self._jobs: Dict[str, AbstractBeamJob] = {} + self._jobs: dict[str, AbstractBeamJob] = {} def create_beam_job( self, @@ -272,7 +270,7 @@ def __init__(self, jar_path, root): def close(self): self._zipfile_handle.close() - def file_writer(self, path: str) -> Tuple[BinaryIO, str]: + def file_writer(self, path: str) -> tuple[BinaryIO, str]: """Given a relative path, returns an open handle that can be written to and an reference that can later be used to read this file.""" full_path = '%s/%s' % (self._root, path) diff --git a/sdks/python/apache_beam/runners/portability/artifact_service.py b/sdks/python/apache_beam/runners/portability/artifact_service.py index 60b89f3a424a..bbb69ada3e10 100644 --- a/sdks/python/apache_beam/runners/portability/artifact_service.py +++ b/sdks/python/apache_beam/runners/portability/artifact_service.py @@ -32,11 +32,8 @@ from typing import Any from typing import BinaryIO # pylint: disable=unused-import from typing import Callable -from typing import Dict -from typing import List from typing import MutableMapping from typing import Optional -from typing import Tuple from urllib.request import urlopen import grpc @@ -97,12 +94,12 @@ class ArtifactStagingService( beam_artifact_api_pb2_grpc.ArtifactStagingServiceServicer): def __init__( self, - file_writer: Callable[[str, Optional[str]], Tuple[BinaryIO, str]], + file_writer: Callable[[str, Optional[str]], tuple[BinaryIO, str]], ): self._lock = threading.Lock() - self._jobs_to_stage: Dict[ + self._jobs_to_stage: dict[ str, - Tuple[Dict[Any, List[beam_runner_api_pb2.ArtifactInformation]], + tuple[dict[Any, list[beam_runner_api_pb2.ArtifactInformation]], threading.Event]] = {} self._file_writer = file_writer @@ -110,7 +107,7 @@ def register_job( self, staging_token: str, dependency_sets: MutableMapping[ - Any, List[beam_runner_api_pb2.ArtifactInformation]]): + Any, list[beam_runner_api_pb2.ArtifactInformation]]): if staging_token in self._jobs_to_stage: raise ValueError('Already staging %s' % staging_token) with self._lock: diff --git a/sdks/python/apache_beam/runners/portability/expansion_service_test.py b/sdks/python/apache_beam/runners/portability/expansion_service_test.py index 7aa2e5f16e5b..b0b0b2dd2bfa 100644 --- a/sdks/python/apache_beam/runners/portability/expansion_service_test.py +++ b/sdks/python/apache_beam/runners/portability/expansion_service_test.py @@ -159,7 +159,7 @@ def expand(self, pcoll): return pcoll \ | beam.CoGroupByKey() \ | beam.ParDo(self.ConcatFn()).with_output_types( - typing.Tuple[int, typing.Iterable[str]]) + tuple[int, typing.Iterable[str]]) def to_runner_api_parameter(self, unused_context): return TEST_CGBK_URN, None diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py index ec9f5dd15ba8..d2f0f97a8a37 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/execution.py @@ -24,23 +24,17 @@ import itertools import logging import struct -import typing import uuid import weakref from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import DefaultDict -from typing import Dict from typing import Generic from typing import Iterable from typing import Iterator -from typing import List from typing import MutableMapping from typing import Optional from typing import Sequence -from typing import Set -from typing import Tuple from typing import TypeVar from typing import Union @@ -108,7 +102,7 @@ class PartitionableBuffer(Buffer, Protocol): def copy(self) -> 'PartitionableBuffer': pass - def partition(self, n: int) -> List[List[bytes]]: + def partition(self, n: int) -> list[list[bytes]]: pass @property @@ -126,8 +120,8 @@ class ListBuffer: """Used to support parititioning of a list.""" def __init__(self, coder_impl: Optional[CoderImpl]) -> None: self._coder_impl = coder_impl or CoderImpl() - self._inputs: List[bytes] = [] - self._grouped_output: Optional[List[List[bytes]]] = None + self._inputs: list[bytes] = [] + self._grouped_output: Optional[list[list[bytes]]] = None self.cleared = False def copy(self) -> 'ListBuffer': @@ -150,7 +144,7 @@ def append(self, element: bytes) -> None: raise RuntimeError('ListBuffer append after read.') self._inputs.append(element) - def partition(self, n: int) -> List[List[bytes]]: + def partition(self, n: int) -> list[list[bytes]]: if self.cleared: raise RuntimeError('Trying to partition a cleared ListBuffer.') if len(self._inputs) >= n or len(self._inputs) == 0: @@ -198,9 +192,11 @@ def __init__( self._key_coder = pre_grouped_coder.key_coder() self._pre_grouped_coder = pre_grouped_coder self._post_grouped_coder = post_grouped_coder - self._table: DefaultDict[bytes, List[Any]] = collections.defaultdict(list) + self._table: collections.defaultdict[bytes, + list[Any]] = collections.defaultdict( + list) self._windowing = windowing - self._grouped_output: Optional[List[List[bytes]]] = None + self._grouped_output: Optional[list[list[bytes]]] = None def copy(self) -> 'GroupingBuffer': # This is a silly temporary optimization. This class must be removed once @@ -234,7 +230,7 @@ def extend(self, input_buffer: Buffer) -> None: for key, values in input_buffer._table.items(): self._table[key].extend(values) - def partition(self, n: int) -> List[List[bytes]]: + def partition(self, n: int) -> list[list[bytes]]: """ It is used to partition _GroupingBuffer to N parts. Once it is partitioned, it would not be re-partitioned with diff N. Re-partition is not supported now. @@ -311,9 +307,8 @@ def __init__( raise ValueError("Unknown access pattern: '%s'" % access_pattern.urn) self._windowed_value_coder = coder self._window_coder = coder.window_coder - self._values_by_window: DefaultDict[Tuple[str, BoundedWindow], - List[Any]] = collections.defaultdict( - list) + self._values_by_window: collections.defaultdict[ + tuple[str, BoundedWindow], list[Any]] = collections.defaultdict(list) def append(self, elements_data: bytes) -> None: input_stream = create_InputStream(elements_data) @@ -326,7 +321,7 @@ def append(self, elements_data: bytes) -> None: for window in windowed_value.windows: self._values_by_window[key, window].append(value) - def encoded_items(self) -> Iterator[Tuple[bytes, bytes, bytes, int]]: + def encoded_items(self) -> Iterator[tuple[bytes, bytes, bytes, int]]: value_coder_impl = self._value_coder.get_impl() key_coder_impl = self._key_coder.get_impl() for (key, window), values in self._values_by_window.items(): @@ -385,13 +380,13 @@ class _ProcessingQueueManager(object): """ class KeyedQueue(Generic[QUEUE_KEY_TYPE]): def __init__(self) -> None: - self._q: typing.Deque[Tuple[QUEUE_KEY_TYPE, - DataInput]] = collections.deque() + self._q: collections.deque[tuple[QUEUE_KEY_TYPE, + DataInput]] = collections.deque() self._keyed_elements: MutableMapping[QUEUE_KEY_TYPE, - Tuple[QUEUE_KEY_TYPE, + tuple[QUEUE_KEY_TYPE, DataInput]] = {} - def enque(self, elm: Tuple[QUEUE_KEY_TYPE, DataInput]) -> None: + def enque(self, elm: tuple[QUEUE_KEY_TYPE, DataInput]) -> None: key = elm[0] incoming_inputs: DataInput = elm[1] if not incoming_inputs: @@ -415,7 +410,7 @@ def enque(self, elm: Tuple[QUEUE_KEY_TYPE, DataInput]) -> None: self._keyed_elements[key] = elm self._q.appendleft(elm) - def deque(self) -> Tuple[QUEUE_KEY_TYPE, DataInput]: + def deque(self) -> tuple[QUEUE_KEY_TYPE, DataInput]: elm = self._q.pop() key = elm[0] del self._keyed_elements[key] @@ -434,9 +429,9 @@ def __str__(self) -> str: def __init__(self) -> None: # For time-pending and watermark-pending inputs, the key type is # STAGE+TIMESTAMP, while for the ready inputs, the key type is only STAGE. - self.time_pending_inputs = _ProcessingQueueManager.KeyedQueue[Tuple[ + self.time_pending_inputs = _ProcessingQueueManager.KeyedQueue[tuple[ str, Timestamp]]() - self.watermark_pending_inputs = _ProcessingQueueManager.KeyedQueue[Tuple[ + self.watermark_pending_inputs = _ProcessingQueueManager.KeyedQueue[tuple[ str, Timestamp]]() self.ready_inputs = _ProcessingQueueManager.KeyedQueue[str]() @@ -451,7 +446,7 @@ class GenericMergingWindowFn(window.WindowFn): TO_SDK_TRANSFORM = 'read' FROM_SDK_TRANSFORM = 'write' - _HANDLES: Dict[str, 'GenericMergingWindowFn'] = {} + _HANDLES: dict[str, 'GenericMergingWindowFn'] = {} def __init__( self, @@ -664,14 +659,14 @@ class FnApiRunnerExecutionContext(object): """ def __init__( self, - stages: List[translations.Stage], + stages: list[translations.Stage], worker_handler_manager: 'worker_handlers.WorkerHandlerManager', pipeline_components: beam_runner_api_pb2.Components, safe_coders: translations.SafeCoderMapping, - data_channel_coders: Dict[str, str], + data_channel_coders: dict[str, str], num_workers: int, uses_teststream: bool = False, - split_managers: Sequence[Tuple[str, Callable[[int], + split_managers: Sequence[tuple[str, Callable[[int], Iterable[float]]]] = () ) -> None: """ @@ -705,7 +700,7 @@ def __init__( Optional[str]] = {} # Map of buffer_id to its consumers. A consumer is the pair of # Stage name + Ptransform name that consume that buffer. - self.buffer_id_to_consumer_pairs: Dict[bytes, Set[Tuple[str, str]]] = {} + self.buffer_id_to_consumer_pairs: dict[bytes, set[tuple[str, str]]] = {} self._compute_pipeline_dictionaries() self.watermark_manager = WatermarkManager(stages) @@ -721,7 +716,7 @@ def __init__( for id in self.pipeline_components.windowing_strategies.keys() } - self._stage_managers: Dict[str, BundleContextManager] = {} + self._stage_managers: dict[str, BundleContextManager] = {} def bundle_manager_for( self, @@ -834,14 +829,14 @@ def _build_data_side_inputs_map( patterns for all of the outputs of a stage that will be consumed as a side input. """ - transform_consumers: DefaultDict[ + transform_consumers: collections.defaultdict[ str, - List[beam_runner_api_pb2.PTransform]] = collections.defaultdict(list) - stage_consumers: DefaultDict[ - str, List[translations.Stage]] = collections.defaultdict(list) + list[beam_runner_api_pb2.PTransform]] = collections.defaultdict(list) + stage_consumers: collections.defaultdict[ + str, list[translations.Stage]] = collections.defaultdict(list) - def get_all_side_inputs() -> Set[str]: - all_side_inputs: Set[str] = set() + def get_all_side_inputs() -> set[str]: + all_side_inputs: set[str] = set() for stage in stages: for transform in stage.transforms: for input in transform.inputs.values(): @@ -852,7 +847,7 @@ def get_all_side_inputs() -> Set[str]: return all_side_inputs all_side_inputs = frozenset(get_all_side_inputs()) - data_side_inputs_by_producing_stage: Dict[str, DataSideInput] = {} + data_side_inputs_by_producing_stage: dict[str, DataSideInput] = {} producing_stages_by_pcoll = {} @@ -996,7 +991,7 @@ def __init__( execution_context: FnApiRunnerExecutionContext, stage: translations.Stage, num_workers: int, - split_managers: Sequence[Tuple[str, Callable[[int], Iterable[float]]]], + split_managers: Sequence[tuple[str, Callable[[int], Iterable[float]]]], ) -> None: self.execution_context = execution_context self.stage = stage @@ -1007,11 +1002,11 @@ def __init__( # Properties that are lazily initialized self._process_bundle_descriptor: Optional[ beam_fn_api_pb2.ProcessBundleDescriptor] = None - self._worker_handlers: Optional[List[worker_handlers.WorkerHandler]] = None + self._worker_handlers: Optional[list[worker_handlers.WorkerHandler]] = None # a mapping of {(transform_id, timer_family_id): timer_coder_id}. The map # is built after self._process_bundle_descriptor is initialized. # This field can be used to tell whether current bundle has timers. - self._timer_coder_ids: Optional[Dict[Tuple[str, str], str]] = None + self._timer_coder_ids: Optional[dict[tuple[str, str], str]] = None # A mapping from transform_name to Buffer ID self.stage_data_outputs: DataOutput = {} @@ -1033,7 +1028,7 @@ def _compute_expected_outputs(self) -> None: create_buffer_id(timer_family_id, 'timers'), time_domain) @property - def worker_handlers(self) -> List['worker_handlers.WorkerHandler']: + def worker_handlers(self) -> list['worker_handlers.WorkerHandler']: if self._worker_handlers is None: self._worker_handlers = ( self.execution_context.worker_handler_manager.get_worker_handlers( @@ -1088,7 +1083,7 @@ def get_input_coder_impl(self, transform_id: str) -> CoderImpl: assert coder_id return self.get_coder_impl(coder_id) - def _build_timer_coders_id_map(self) -> Dict[Tuple[str, str], str]: + def _build_timer_coders_id_map(self) -> dict[tuple[str, str], str]: assert self._process_bundle_descriptor is not None timer_coder_ids = {} for transform_id, transform_proto in (self._process_bundle_descriptor diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py index fdf291cb6f12..32997441a48e 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner.py @@ -32,16 +32,11 @@ import threading import time from typing import Callable -from typing import Dict from typing import Iterable from typing import Iterator -from typing import List from typing import Mapping from typing import MutableMapping from typing import Optional -from typing import Set -from typing import Tuple -from typing import Type from typing import TypeVar from typing import Union @@ -133,7 +128,7 @@ def __init__( retrieval_token='unused-retrieval-token')) @staticmethod - def supported_requirements() -> Tuple[str, ...]: + def supported_requirements() -> tuple[str, ...]: return ( common_urns.requirements.REQUIRES_STATEFUL_PROCESSING.urn, common_urns.requirements.REQUIRES_BUNDLE_FINALIZATION.urn, @@ -380,7 +375,7 @@ def _check_requirements( def create_stages( self, pipeline_proto: beam_runner_api_pb2.Pipeline - ) -> Tuple[translations.TransformContext, List[translations.Stage]]: + ) -> tuple[translations.TransformContext, list[translations.Stage]]: return translations.create_and_optimize_stages( copy.deepcopy(pipeline_proto), phases=[ @@ -409,7 +404,7 @@ def create_stages( def run_stages( self, stage_context: translations.TransformContext, - stages: List[translations.Stage]) -> 'RunnerResult': + stages: list[translations.Stage]) -> 'RunnerResult': """Run a list of topologically-sorted stages in batch mode. Args: @@ -607,7 +602,7 @@ def _run_bundle_multiple_times_for_testing( @staticmethod def _collect_written_timers( bundle_context_manager: execution.BundleContextManager - ) -> Tuple[Dict[translations.TimerFamilyId, timestamp.Timestamp], + ) -> tuple[dict[translations.TimerFamilyId, timestamp.Timestamp], OutputTimerData]: """Review output buffers, and collect written timers. @@ -670,7 +665,7 @@ def _add_sdk_delayed_applications_to_deferred_inputs( bundle_context_manager: execution.BundleContextManager, bundle_result: beam_fn_api_pb2.InstructionResponse, deferred_inputs: MutableMapping[str, execution.PartitionableBuffer] - ) -> Set[str]: + ) -> set[str]: """Returns a set of PCollection IDs of PColls having delayed applications. This transform inspects the bundle_context_manager, and bundle_result @@ -697,11 +692,11 @@ def _add_sdk_delayed_applications_to_deferred_inputs( def _add_residuals_and_channel_splits_to_deferred_inputs( self, - splits: List[beam_fn_api_pb2.ProcessBundleSplitResponse], + splits: list[beam_fn_api_pb2.ProcessBundleSplitResponse], bundle_context_manager: execution.BundleContextManager, last_sent: MutableMapping[str, execution.PartitionableBuffer], deferred_inputs: MutableMapping[str, execution.PartitionableBuffer] - ) -> Tuple[Set[str], Set[str]]: + ) -> tuple[set[str], set[str]]: """Returns a two sets representing PCollections with watermark holds. The first set represents PCollections with delayed root applications. @@ -710,7 +705,7 @@ def _add_residuals_and_channel_splits_to_deferred_inputs( pcolls_with_delayed_apps = set() transforms_with_channel_splits = set() - prev_stops: Dict[str, int] = {} + prev_stops: dict[str, int] = {} for split in splits: for delayed_application in split.residual_roots: producer_name = bundle_context_manager.input_for( @@ -927,8 +922,8 @@ def _get_bundle_manager( cache_token_generator = FnApiRunner.get_cache_token_generator(static=False) if bundle_context_manager.num_workers == 1: # Avoid thread/processor pools for increased performance and debugability. - bundle_manager_type: Union[Type[BundleManager], - Type[ParallelBundleManager]] = BundleManager + bundle_manager_type: Union[type[BundleManager], + type[ParallelBundleManager]] = BundleManager elif bundle_context_manager.stage.is_stateful(): # State is keyed, and a single key cannot be processed concurrently. # Alternatively, we could arrange to partition work by key. @@ -946,11 +941,11 @@ def _build_watermark_updates( runner_execution_context: execution.FnApiRunnerExecutionContext, stage_inputs: Iterable[str], expected_timers: Iterable[translations.TimerFamilyId], - pcolls_with_da: Set[str], - transforms_w_splits: Set[str], - watermarks_by_transform_and_timer_family: Dict[translations.TimerFamilyId, + pcolls_with_da: set[str], + transforms_w_splits: set[str], + watermarks_by_transform_and_timer_family: dict[translations.TimerFamilyId, timestamp.Timestamp] - ) -> Dict[Union[str, translations.TimerFamilyId], timestamp.Timestamp]: + ) -> dict[Union[str, translations.TimerFamilyId], timestamp.Timestamp]: """Builds a dictionary of PCollection (or TimerFamilyId) to timestamp. Args: @@ -965,7 +960,7 @@ def _build_watermark_updates( watermarks_by_transform_and_timer_family: represent the set of watermark holds to be added for each timer family. """ - updates: Dict[Union[str, translations.TimerFamilyId], + updates: dict[Union[str, translations.TimerFamilyId], timestamp.Timestamp] = {} def get_pcoll_id(transform_id): @@ -1016,10 +1011,10 @@ def _run_bundle( data_output: DataOutput, expected_timer_output: OutputTimers, bundle_manager: 'BundleManager' - ) -> Tuple[beam_fn_api_pb2.InstructionResponse, - Dict[str, execution.PartitionableBuffer], + ) -> tuple[beam_fn_api_pb2.InstructionResponse, + dict[str, execution.PartitionableBuffer], OutputTimerData, - Dict[Union[str, translations.TimerFamilyId], timestamp.Timestamp]]: + dict[Union[str, translations.TimerFamilyId], timestamp.Timestamp]]: """Execute a bundle, and return a result object, and deferred inputs.""" data_input = bundle_input.data input_timers = bundle_input.timers @@ -1038,7 +1033,7 @@ def _run_bundle( # - timers # - SDK-initiated deferred applications of root elements # - Runner-initiated deferred applications of root elements - deferred_inputs: Dict[str, execution.PartitionableBuffer] = {} + deferred_inputs: dict[str, execution.PartitionableBuffer] = {} watermarks_by_transform_and_timer_family, newly_set_timers = ( self._collect_written_timers(bundle_context_manager)) @@ -1252,8 +1247,8 @@ def _generate_splits_for_testing( self, split_manager, inputs: Mapping[str, execution.PartitionableBuffer], - process_bundle_id) -> List[beam_fn_api_pb2.ProcessBundleSplitResponse]: - split_results: List[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] + process_bundle_id) -> list[beam_fn_api_pb2.ProcessBundleSplitResponse]: + split_results: list[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] read_transform_id, buffer_data = only_element(inputs.items()) byte_stream = b''.join(buffer_data or []) num_elements = len( @@ -1359,7 +1354,7 @@ def process_bundle( cache_tokens=[next(self._cache_token_generator)])) result_future = self._worker_handler.control_conn.push(process_bundle_req) - split_results: List[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] + split_results: list[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] with ProgressRequester(self._worker_handler, process_bundle_id, self._progress_frequency): @@ -1368,8 +1363,8 @@ def process_bundle( split_results = self._generate_splits_for_testing( split_manager, inputs, process_bundle_id) - expect_reads: List[Union[str, - Tuple[str, + expect_reads: list[Union[str, + tuple[str, str]]] = list(expected_outputs.keys()) expect_reads.extend(list(expected_output_timers.keys())) @@ -1433,8 +1428,8 @@ def process_bundle( expected_output_timers: OutputTimers, dry_run: bool = False, ) -> BundleProcessResult: - part_inputs: List[Dict[str, - List[bytes]]] = [{} + part_inputs: list[dict[str, + list[bytes]]] = [{} for _ in range(self._num_workers)] # Timers are only executed on the first worker # TODO(BEAM-9741): Split timers to multiple workers @@ -1446,7 +1441,7 @@ def process_bundle( part_inputs[ix][name] = part merged_result: Optional[beam_fn_api_pb2.InstructionResponse] = None - split_result_list: List[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] + split_result_list: list[beam_fn_api_pb2.ProcessBundleSplitResponse] = [] def execute(part_map_input_timers) -> BundleProcessResult: part_map, input_timers = part_map_input_timers @@ -1581,7 +1576,7 @@ def query(self, filter=None): self.BOUNDED_TRIES: bounded_tries, } - def monitoring_infos(self) -> List[metrics_pb2.MonitoringInfo]: + def monitoring_infos(self) -> list[metrics_pb2.MonitoringInfo]: return [ item for sublist in self._monitoring_infos.values() for item in sublist ] diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py index 8bae82f0aaaf..72a5ed4412ca 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/fn_runner_test.py @@ -31,10 +31,7 @@ import unittest import uuid from typing import Any -from typing import Dict from typing import Iterator -from typing import List -from typing import Tuple from typing import no_type_check import hamcrest # pylint: disable=ungrouped-imports @@ -945,7 +942,7 @@ def is_buffered_correctly(actual): | beam.WindowInto( window.FixedWindows(1) if windowed else window.GlobalWindows()) | beam.Map(lambda x: (key, x)).with_output_types( - Tuple[key_type if key_type else type(key), Any]) + tuple[key_type if key_type else type(key), Any]) | beam.ParDo(BufferDoFn())) assert_that(actual, is_buffered_correctly) @@ -2440,7 +2437,7 @@ def __reduce__(self): return _unpickle_element_counter, (name, ) -_pickled_element_counters: Dict[str, ElementCounter] = {} +_pickled_element_counters: dict[str, ElementCounter] = {} def _unpickle_element_counter(name): @@ -2679,8 +2676,8 @@ def infer_output_type(self, input_type): class ListPlusOneDoFn(beam.DoFn): - def process_batch(self, batch: List[np.int64], *unused_args, - **unused_kwargs) -> Iterator[List[np.int64]]: + def process_batch(self, batch: list[np.int64], *unused_args, + **unused_kwargs) -> Iterator[list[np.int64]]: assert isinstance(batch, list) yield [element + 1 for element in batch] diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index aadf5cfaa866..7261a815f7e4 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -33,16 +33,11 @@ from typing import Collection from typing import Container from typing import DefaultDict -from typing import Dict -from typing import FrozenSet from typing import Iterable from typing import Iterator -from typing import List from typing import MutableMapping from typing import NamedTuple from typing import Optional -from typing import Set -from typing import Tuple from typing import TypeVar from typing import Union @@ -59,7 +54,6 @@ from apache_beam.utils import timestamp if TYPE_CHECKING: - from apache_beam.runners.portability.fn_api_runner.execution import ListBuffer from apache_beam.runners.portability.fn_api_runner.execution import PartitionableBuffer T = TypeVar('T') @@ -90,39 +84,39 @@ # TimerFamilyId is identified by transform name + timer family # TODO(pabloem): Rename this type to express this name is unique per pipeline. -TimerFamilyId = Tuple[str, str] +TimerFamilyId = tuple[str, str] BufferId = bytes # SideInputId is identified by a consumer ParDo + tag. -SideInputId = Tuple[str, str] +SideInputId = tuple[str, str] SideInputAccessPattern = beam_runner_api_pb2.FunctionSpec # A map from a PCollection coder ID to a Safe Coder ID # A safe coder is a coder that can be used on the runner-side of the FnApi. # A safe coder receives a byte string, and returns a type that can be # understood by the runner when deserializing. -SafeCoderMapping = Dict[str, str] +SafeCoderMapping = dict[str, str] # DataSideInput maps SideInputIds to a tuple of the encoded bytes of the side # input content, and a payload specification regarding the type of side input # (MultiMap / Iterable). -DataSideInput = Dict[SideInputId, Tuple[bytes, SideInputAccessPattern]] +DataSideInput = dict[SideInputId, tuple[bytes, SideInputAccessPattern]] -DataOutput = Dict[str, BufferId] +DataOutput = dict[str, BufferId] # A map of [Transform ID, Timer Family ID] to [Buffer ID, Time Domain for timer] # The time domain comes from beam_runner_api_pb2.TimeDomain. It may be # EVENT_TIME or PROCESSING_TIME. -OutputTimers = MutableMapping[TimerFamilyId, Tuple[BufferId, Any]] +OutputTimers = MutableMapping[TimerFamilyId, tuple[BufferId, Any]] # A map of [Transform ID, Timer Family ID] to [Buffer CONTENTS, Timestamp] OutputTimerData = MutableMapping[TimerFamilyId, - Tuple['PartitionableBuffer', + tuple['PartitionableBuffer', timestamp.Timestamp]] -BundleProcessResult = Tuple[beam_fn_api_pb2.InstructionResponse, - List[beam_fn_api_pb2.ProcessBundleSplitResponse]] +BundleProcessResult = tuple[beam_fn_api_pb2.InstructionResponse, + list[beam_fn_api_pb2.ProcessBundleSplitResponse]] # TODO(pabloem): Change tha name to a more representative one @@ -902,7 +896,7 @@ def _group_stages_by_key(stages, get_stage_key): def _group_stages_with_limit(stages, get_limit): # type: (Iterable[Stage], Callable[[str], int]) -> Iterable[Collection[Stage]] stages_with_limit = [(stage, get_limit(stage.name)) for stage in stages] - group: List[Stage] = [] + group: list[Stage] = [] group_limit = 0 for stage, limit in sorted(stages_with_limit, key=operator.itemgetter(1)): if limit < 1: diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager.py index 021f5950d71d..0bdcf2ac8e50 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/trigger_manager.py @@ -99,7 +99,7 @@ def __init__( super().__init__(all_windows) self.trigger_context = context self.windowing = windowing - self.merged_away: typing.Dict[BoundedWindow, BoundedWindow] = {} + self.merged_away: dict[BoundedWindow, BoundedWindow] = {} def merge(self, to_be_merged, merge_result): _LOGGER.debug("Merging %s into %s", to_be_merged, merge_result) @@ -117,9 +117,9 @@ def merge(self, to_be_merged, merge_result): @typehints.with_input_types( - typing.Tuple[K, typing.Iterable[windowed_value.WindowedValue]]) + tuple[K, typing.Iterable[windowed_value.WindowedValue]]) @typehints.with_output_types( - typing.Tuple[K, typing.Iterable[windowed_value.WindowedValue]]) + tuple[K, typing.Iterable[windowed_value.WindowedValue]]) class GeneralTriggerManagerDoFn(DoFn): """A trigger manager that supports all windowing / triggering cases. @@ -153,7 +153,7 @@ def __init__(self, windowing: Windowing): def process( self, - element: typing.Tuple[K, typing.Iterable[windowed_value.WindowedValue]], + element: tuple[K, typing.Iterable[windowed_value.WindowedValue]], all_elements: BagRuntimeState = DoFn.StateParam(WINDOW_ELEMENT_PAIRS), # type: ignore latest_processing_time: AccumulatingRuntimeState = DoFn.StateParam(LAST_KNOWN_TIME), # type: ignore latest_watermark: AccumulatingRuntimeState = DoFn.StateParam( # type: ignore @@ -226,7 +226,7 @@ def _fire_eligible_windows( timestamp: Timestamp, timer_tag: typing.Optional[str], context: 'FnRunnerStatefulTriggerContext', - windows_of_interest: typing.Optional[typing.Set[BoundedWindow]] = None): + windows_of_interest: typing.Optional[set[BoundedWindow]] = None): windows_to_elements = context.windows_to_elements_map() context.all_elements_state.clear() @@ -254,7 +254,7 @@ def _fire_eligible_windows( elems = [WindowedValue(e.value, e.timestamp, (w, )) for e in elems] yield (key, elems) - finished_windows: typing.Set[BoundedWindow] = set( + finished_windows: set[BoundedWindow] = set( context.finished_windows_state.read()) # Add elements that were not fired back into state. for w, elems in windows_to_elements.items(): @@ -341,13 +341,11 @@ def __init__( self.finished_windows_state = finished_windows_state def windows_to_elements_map( - self - ) -> typing.Dict[BoundedWindow, typing.List[windowed_value.WindowedValue]]: - window_element_pairs: typing.Iterable[typing.Tuple[ - BoundedWindow, - windowed_value.WindowedValue]] = self.all_elements_state.read() - result: typing.Dict[BoundedWindow, - typing.List[windowed_value.WindowedValue]] = {} + self) -> dict[BoundedWindow, list[windowed_value.WindowedValue]]: + window_element_pairs: typing.Iterable[ + tuple[BoundedWindow, + windowed_value.WindowedValue]] = self.all_elements_state.read() + result: dict[BoundedWindow, list[windowed_value.WindowedValue]] = {} for w, e in window_element_pairs: if w not in result: result[w] = [] diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/visualization_tools.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/visualization_tools.py index 28efa0788b05..85aa11af4480 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/visualization_tools.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/visualization_tools.py @@ -16,8 +16,6 @@ # """Set of utilities to visualize a pipeline to be executed by FnApiRunner.""" -from typing import Set -from typing import Tuple from apache_beam.runners.portability.fn_api_runner.translations import Stage from apache_beam.runners.portability.fn_api_runner.watermark_manager import WatermarkManager @@ -89,8 +87,8 @@ def add_links(link_from=None, link_to=None, edge_style="solid"): g.edge(link_from, link_to, style=edge_style) seen_links.add((link_to, link_from, edge_style)) - seen_nodes: Set[str] = set() - seen_links: Set[Tuple[str, str]] = set() + seen_nodes: set[str] = set() + seen_links: set[tuple[str, str]] = set() for node in watermark_manager._stages_by_name.values(): name = 'STAGE_%s...%s' % (node.name[:30], node.name[-30:]) add_node(name, 'box') diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/watermark_manager.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/watermark_manager.py index 106eca108297..a61abb41e3e5 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/watermark_manager.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/watermark_manager.py @@ -19,9 +19,6 @@ from __future__ import absolute_import -from typing import Dict -from typing import List -from typing import Set from typing import Union from apache_beam.portability.api import beam_runner_api_pb2 @@ -40,7 +37,7 @@ class PCollectionNode(object): def __init__(self, name): self.name = name self._watermark = timestamp.MIN_TIMESTAMP - self.producers: Set[WatermarkManager.StageNode] = set() + self.producers: set[WatermarkManager.StageNode] = set() self.consumers = 0 self._fully_consumed_by = 0 self._produced_watermark = timestamp.MIN_TIMESTAMP @@ -77,9 +74,9 @@ def __init__(self, name): # the output watermark of the stage, because only the main input # can actually advance that watermark. self.name = name - self.inputs: Set[WatermarkManager.PCollectionNode] = set() - self.side_inputs: Set[WatermarkManager.PCollectionNode] = set() - self.outputs: Set[WatermarkManager.PCollectionNode] = set() + self.inputs: set[WatermarkManager.PCollectionNode] = set() + self.side_inputs: set[WatermarkManager.PCollectionNode] = set() + self.outputs: set[WatermarkManager.PCollectionNode] = set() def __str__(self): return 'StageNode None: - self._pcollections_by_name: Dict[Union[str, translations.TimerFamilyId], + def __init__(self, stages: list[translations.Stage]) -> None: + self._pcollections_by_name: dict[Union[str, translations.TimerFamilyId], WatermarkManager.PCollectionNode] = {} - self._stages_by_name: Dict[str, WatermarkManager.StageNode] = {} + self._stages_by_name: dict[str, WatermarkManager.StageNode] = {} def add_pcollection( pcname: str, @@ -179,7 +176,7 @@ def add_pcollection( self._verify(stages) - def _verify(self, stages: List[translations.Stage]): + def _verify(self, stages: list[translations.Stage]): for s in stages: if len(self._stages_by_name[s.name].inputs) == 0: from apache_beam.runners.portability.fn_api_runner import visualization_tools diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py index d79b381f2d78..915134d3afa8 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/worker_handlers.py @@ -34,14 +34,10 @@ from typing import BinaryIO # pylint: disable=unused-import from typing import Callable from typing import DefaultDict -from typing import Dict from typing import Iterable from typing import Iterator -from typing import List from typing import Mapping from typing import Optional -from typing import Tuple -from typing import Type from typing import TypeVar from typing import Union from typing import cast @@ -77,7 +73,6 @@ if TYPE_CHECKING: from google.protobuf import message - from grpc import ServicerContext from apache_beam.runners.portability.fn_api_runner.fn_runner import \ ExtendedProvisionInfo # pylint: disable=ungrouped-imports @@ -1034,7 +1029,7 @@ def append(self, item): def extend(self, other: Buffer) -> None: raise NotImplementedError() - StateType = Union[CopyOnWriteState, DefaultDict[bytes, Buffer]] + StateType = Union[CopyOnWriteState, collections.defaultdict[bytes, Buffer]] def __init__(self): # type: () -> None diff --git a/sdks/python/apache_beam/runners/portability/local_job_service.py b/sdks/python/apache_beam/runners/portability/local_job_service.py index 9d85e4d1e664..daa12f92202f 100644 --- a/sdks/python/apache_beam/runners/portability/local_job_service.py +++ b/sdks/python/apache_beam/runners/portability/local_job_service.py @@ -28,7 +28,6 @@ import time import traceback from typing import Any -from typing import List from typing import Mapping from typing import Optional @@ -248,7 +247,7 @@ def __init__( self._provision_info = provision_info self._artifact_staging_endpoint = artifact_staging_endpoint self._artifact_service = artifact_service - self._state_queues: List[queue.Queue] = [] + self._state_queues: list[queue.Queue] = [] self._log_queues = JobLogQueues() self.daemon = True self.result = None @@ -374,7 +373,7 @@ def Logging(self, log_bundles, context=None): class JobLogQueues(object): def __init__(self): - self._queues: List[queue.Queue] = [] + self._queues: list[queue.Queue] = [] self._cache = [] self._cache_size = 10 self._lock = threading.Lock() @@ -455,7 +454,7 @@ def emit(self, record): def _extract_dependency_sets( envs: Mapping[str, beam_runner_api_pb2.Environment] -) -> Mapping[Any, List[beam_runner_api_pb2.ArtifactInformation]]: +) -> Mapping[Any, list[beam_runner_api_pb2.ArtifactInformation]]: """Expands the set of environments into a mapping of (opaque) keys to dependency sets. This is not 1:1 in the case of AnyOf environments. @@ -472,7 +471,7 @@ def dependencies_iter(): def _update_dependency_sets( envs: Mapping[str, beam_runner_api_pb2.Environment], - resolved_deps: Mapping[Any, List[beam_runner_api_pb2.ArtifactInformation]]): + resolved_deps: Mapping[Any, list[beam_runner_api_pb2.ArtifactInformation]]): """Takes the mapping of beam Environments (originally passed to `_extract_dependency_sets`) and a set of (key-wise) updated dependencies, and updates the original environment protos to contain the updated diff --git a/sdks/python/apache_beam/runners/portability/portable_runner.py b/sdks/python/apache_beam/runners/portability/portable_runner.py index 45d727371263..22c3440b69ed 100644 --- a/sdks/python/apache_beam/runners/portability/portable_runner.py +++ b/sdks/python/apache_beam/runners/portability/portable_runner.py @@ -26,10 +26,8 @@ import threading import time from typing import Any -from typing import Dict from typing import Iterator from typing import Optional -from typing import Tuple import grpc from google.protobuf import struct_pb2 @@ -96,7 +94,7 @@ def __init__(self, job_service, options, retain_unknown_options=False): def submit( self, proto_pipeline: beam_runner_api_pb2.Pipeline - ) -> Tuple[str, + ) -> tuple[str, Iterator[beam_job_api_pb2.JobStateEvent], Iterator[beam_job_api_pb2.JobMessagesResponse]]: """ @@ -171,7 +169,7 @@ def add_runner_options(parser): @staticmethod def encode_pipeline_options( - all_options: Dict[str, Any]) -> 'struct_pb2.Struct': + all_options: dict[str, Any]) -> 'struct_pb2.Struct': def convert_pipeline_option_value(v): # convert int values: BEAM-5509 if type(v) == int: @@ -215,7 +213,7 @@ def stage( def run( self, preparation_id: str - ) -> Tuple[str, + ) -> tuple[str, Iterator[beam_job_api_pb2.JobStateEvent], Iterator[beam_job_api_pb2.JobMessagesResponse]]: """Run the job""" diff --git a/sdks/python/apache_beam/runners/portability/prism_runner.py b/sdks/python/apache_beam/runners/portability/prism_runner.py index d2164cfecd10..0458898b5e96 100644 --- a/sdks/python/apache_beam/runners/portability/prism_runner.py +++ b/sdks/python/apache_beam/runners/portability/prism_runner.py @@ -480,14 +480,13 @@ def _get_executable_path(self) -> str: return self._prepare_executable(local_path, self.BIN_CACHE, ignore_cache) - def subprocess_cmd_and_endpoint( - self) -> typing.Tuple[typing.List[typing.Any], str]: + def subprocess_cmd_and_endpoint(self) -> tuple[list[typing.Any], str]: bin_path = self._get_executable_path() job_port, = subprocess_server.pick_port(self._job_port) subprocess_cmd = [bin_path] + self.prism_arguments(job_port) return (subprocess_cmd, f"localhost:{job_port}") - def prism_arguments(self, job_port) -> typing.List[typing.Any]: + def prism_arguments(self, job_port) -> list[typing.Any]: return [ '--job_port', job_port, diff --git a/sdks/python/apache_beam/runners/portability/sdk_container_builder.py b/sdks/python/apache_beam/runners/portability/sdk_container_builder.py index b958995373d7..1e68fc305aaf 100644 --- a/sdks/python/apache_beam/runners/portability/sdk_container_builder.py +++ b/sdks/python/apache_beam/runners/portability/sdk_container_builder.py @@ -35,7 +35,6 @@ import tempfile import time import uuid -from typing import Type from google.protobuf.json_format import MessageToJson @@ -139,7 +138,7 @@ def build_container_image(cls, pipeline_options: PipelineOptions) -> str: return builder._build() @classmethod - def _get_subclass_by_key(cls, key: str) -> Type['SdkContainerImageBuilder']: + def _get_subclass_by_key(cls, key: str) -> type['SdkContainerImageBuilder']: available_builders = [ subclass for subclass in cls.get_all_subclasses() if subclass._builder_key() == key diff --git a/sdks/python/apache_beam/runners/portability/stager.py b/sdks/python/apache_beam/runners/portability/stager.py index 668477ce1461..e862fde4efef 100644 --- a/sdks/python/apache_beam/runners/portability/stager.py +++ b/sdks/python/apache_beam/runners/portability/stager.py @@ -57,9 +57,7 @@ import tempfile from importlib.metadata import distribution from typing import Callable -from typing import List from typing import Optional -from typing import Tuple from urllib.parse import urlparse from packaging import version @@ -138,7 +136,7 @@ def _create_file_pip_requirements_artifact(local_path): @staticmethod def extract_staging_tuple_iter( - artifacts: List[beam_runner_api_pb2.ArtifactInformation]): + artifacts: list[beam_runner_api_pb2.ArtifactInformation]): for artifact in artifacts: if artifact.type_urn == common_urns.artifact_types.FILE.urn: file_payload = beam_runner_api_pb2.ArtifactFilePayload() @@ -162,8 +160,8 @@ def extract_staging_tuple_iter( def create_job_resources( options: PipelineOptions, temp_dir: str, - build_setup_args: Optional[List[str]] = None, - pypi_requirements: Optional[List[str]] = None, + build_setup_args: Optional[list[str]] = None, + pypi_requirements: Optional[list[str]] = None, populate_requirements_cache: Optional[Callable[[str, str, bool], None]] = None, skip_prestaged_dependencies: Optional[bool] = False, @@ -200,7 +198,7 @@ def create_job_resources( while trying to create the resources (e.g., build a setup package). """ - resources: List[beam_runner_api_pb2.ArtifactInformation] = [] + resources: list[beam_runner_api_pb2.ArtifactInformation] = [] setup_options = options.view_as(SetupOptions) use_beam_default_container = options.view_as( @@ -403,7 +401,7 @@ def create_job_resources( def stage_job_resources( self, - resources: List[Tuple[str, str, str]], + resources: list[tuple[str, str, str]], staging_location: Optional[str] = None): """For internal use only; no backwards-compatibility guarantees. @@ -437,9 +435,9 @@ def stage_job_resources( def create_and_stage_job_resources( self, options: PipelineOptions, - build_setup_args: Optional[List[str]] = None, + build_setup_args: Optional[list[str]] = None, temp_dir: Optional[str] = None, - pypi_requirements: Optional[List[str]] = None, + pypi_requirements: Optional[list[str]] = None, populate_requirements_cache: Optional[Callable[[str, str, bool], None]] = None, staging_location: Optional[str] = None): @@ -544,7 +542,7 @@ def _is_remote_path(path): @staticmethod def _create_jar_packages( - jar_packages, temp_dir) -> List[beam_runner_api_pb2.ArtifactInformation]: + jar_packages, temp_dir) -> list[beam_runner_api_pb2.ArtifactInformation]: """Creates a list of local jar packages for Java SDK Harness. :param jar_packages: Ordered list of local paths to jar packages to be @@ -557,9 +555,9 @@ def _create_jar_packages( RuntimeError: If files specified are not found or do not have expected name patterns. """ - resources: List[beam_runner_api_pb2.ArtifactInformation] = [] + resources: list[beam_runner_api_pb2.ArtifactInformation] = [] staging_temp_dir = tempfile.mkdtemp(dir=temp_dir) - local_packages: List[str] = [] + local_packages: list[str] = [] for package in jar_packages: if not os.path.basename(package).endswith('.jar'): raise RuntimeError( @@ -595,7 +593,7 @@ def _create_jar_packages( @staticmethod def _create_extra_packages( extra_packages, - temp_dir) -> List[beam_runner_api_pb2.ArtifactInformation]: + temp_dir) -> list[beam_runner_api_pb2.ArtifactInformation]: """Creates a list of local extra packages. Args: @@ -614,9 +612,9 @@ def _create_extra_packages( RuntimeError: If files specified are not found or do not have expected name patterns. """ - resources: List[beam_runner_api_pb2.ArtifactInformation] = [] + resources: list[beam_runner_api_pb2.ArtifactInformation] = [] staging_temp_dir = tempfile.mkdtemp(dir=temp_dir) - local_packages: List[str] = [] + local_packages: list[str] = [] for package in extra_packages: if not (os.path.basename(package).endswith('.tar') or os.path.basename(package).endswith('.tar.gz') or @@ -815,7 +813,7 @@ def _populate_requirements_cache( def _build_setup_package( setup_file: str, temp_dir: str, - build_setup_args: Optional[List[str]] = None) -> str: + build_setup_args: Optional[list[str]] = None) -> str: saved_current_directory = os.getcwd() try: @@ -882,7 +880,7 @@ def _desired_sdk_filename_in_staging_location(sdk_location) -> str: @staticmethod def _create_beam_sdk( sdk_remote_location, - temp_dir) -> List[beam_runner_api_pb2.ArtifactInformation]: + temp_dir) -> list[beam_runner_api_pb2.ArtifactInformation]: """Creates a Beam SDK file with the appropriate version. Args: diff --git a/sdks/python/apache_beam/runners/portability/stager_test.py b/sdks/python/apache_beam/runners/portability/stager_test.py index 4ec1c697fbff..3d625fb287ae 100644 --- a/sdks/python/apache_beam/runners/portability/stager_test.py +++ b/sdks/python/apache_beam/runners/portability/stager_test.py @@ -26,7 +26,6 @@ import sys import tempfile import unittest -from typing import List import mock import pytest @@ -80,7 +79,7 @@ def create_temp_file(self, path, contents): def is_remote_path(self, path): return path.startswith('/tmp/remote/') - remote_copied_files: List[str] = [] + remote_copied_files: list[str] = [] def file_copy(self, from_path, to_path): if self.is_remote_path(from_path): diff --git a/sdks/python/apache_beam/runners/sdf_utils.py b/sdks/python/apache_beam/runners/sdf_utils.py index 01573656b6ac..ad1a4fd74038 100644 --- a/sdks/python/apache_beam/runners/sdf_utils.py +++ b/sdks/python/apache_beam/runners/sdf_utils.py @@ -25,7 +25,6 @@ from typing import Any from typing import NamedTuple from typing import Optional -from typing import Tuple from typing import Union from apache_beam.transforms.core import WatermarkEstimatorProvider @@ -117,7 +116,7 @@ def try_split(self, fraction_of_remainder): with self._lock: return self._restriction_tracker.try_split(fraction_of_remainder) - def deferred_status(self) -> Optional[Tuple[Any, Duration]]: + def deferred_status(self) -> Optional[tuple[Any, Duration]]: """Returns deferred work which is produced by ``defer_remainder()``. When there is a self-checkpoint performed, the system needs to fulfill the diff --git a/sdks/python/apache_beam/runners/trivial_runner.py b/sdks/python/apache_beam/runners/trivial_runner.py index 6517a3adc373..6f0402abf55c 100644 --- a/sdks/python/apache_beam/runners/trivial_runner.py +++ b/sdks/python/apache_beam/runners/trivial_runner.py @@ -20,7 +20,6 @@ from typing import Any from typing import Iterable from typing import Iterator -from typing import List from typing import TypeVar from apache_beam import coders @@ -347,10 +346,10 @@ def register_process_bundle_descriptor( self._process_bundle_descriptors[ process_bundle_descriptor.id] = process_bundle_descriptor - def get_pcollection_contents(self, pcoll_id: str) -> List[bytes]: + def get_pcollection_contents(self, pcoll_id: str) -> list[bytes]: return self._pcollections_to_encoded_chunks[pcoll_id] - def set_pcollection_contents(self, pcoll_id: str, chunks: List[bytes]): + def set_pcollection_contents(self, pcoll_id: str, chunks: list[bytes]): self._pcollections_to_encoded_chunks[pcoll_id] = chunks def new_id(self, prefix='') -> str: diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py b/sdks/python/apache_beam/runners/worker/bundle_processor.py index faa756d7c5c5..1a852aa19d98 100644 --- a/sdks/python/apache_beam/runners/worker/bundle_processor.py +++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py @@ -39,18 +39,11 @@ from typing import Any from typing import Callable from typing import Container -from typing import DefaultDict -from typing import Dict -from typing import FrozenSet from typing import Iterable from typing import Iterator -from typing import List from typing import Mapping from typing import MutableMapping from typing import Optional -from typing import Set -from typing import Tuple -from typing import Type from typing import TypeVar from typing import Union from typing import cast @@ -108,7 +101,7 @@ Any, beam_runner_api_pb2.PTransform, Union['message.Message', bytes], - Dict[str, List[operations.Operation]] + dict[str, list[operations.Operation]] ], operations.Operation] OperationT = TypeVar('OperationT', bound=operations.Operation) @@ -179,7 +172,7 @@ def __init__( self, operation_name: common.NameContext, step_name, - consumers: Mapping[Any, List[operations.Operation]], + consumers: Mapping[Any, list[operations.Operation]], counter_factory: counters.CounterFactory, state_sampler: statesampler.StateSampler, windowed_coder: coders.Coder, @@ -242,8 +235,8 @@ def process_encoded(self, encoded_windowed_values: bytes) -> None: self.output(decoded_value) def monitoring_infos( - self, transform_id: str, tag_to_pcollection_id: Dict[str, str] - ) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]: + self, transform_id: str, tag_to_pcollection_id: dict[str, str] + ) -> dict[frozenset, metrics_pb2.MonitoringInfo]: all_monitoring_infos = super().monitoring_infos( transform_id, tag_to_pcollection_id) read_progress_info = monitoring_infos.int64_counter( @@ -259,7 +252,7 @@ def monitoring_infos( def try_split( # type: ignore[override] self, fraction_of_remainder, total_buffer_size, allowed_split_points ) -> Optional[ - Tuple[ + tuple[ int, Iterable[operations.SdfSplitResultsPrimary], Iterable[operations.SdfSplitResultsResidual], @@ -317,7 +310,7 @@ def is_valid_split_point(index): # try splitting at the current element. if (keep_of_element_remainder < 1 and is_valid_split_point(index) and is_valid_split_point(index + 1)): - split: Optional[Tuple[ + split: Optional[tuple[ Iterable[operations.SdfSplitResultsPrimary], Iterable[operations.SdfSplitResultsResidual]]] = try_split( keep_of_element_remainder) @@ -408,7 +401,7 @@ def __init__( self._element_coder = coder.wrapped_value_coder self._target_window_coder = coder.window_coder # TODO(robertwb): Limit the cache size. - self._cache: Dict[BoundedWindow, Any] = {} + self._cache: dict[BoundedWindow, Any] = {} self._use_bulk_read = use_bulk_read def __getitem__(self, window): @@ -609,7 +602,7 @@ def __init__( self._state_key = state_key self._value_coder = value_coder self._cleared = False - self._added_elements: List[Any] = [] + self._added_elements: list[Any] = [] def read(self) -> Iterable[Any]: return _ConcatIterable([] if self._cleared else cast( @@ -647,7 +640,7 @@ def __init__( self._state_key = state_key self._value_coder = value_coder self._cleared = False - self._added_elements: Set[Any] = set() + self._added_elements: set[Any] = set() def _compact_data(self, rewrite=True): accumulator = set( @@ -667,7 +660,7 @@ def _compact_data(self, rewrite=True): return accumulator - def read(self) -> Set[Any]: + def read(self) -> set[Any]: return self._compact_data(rewrite=False) def add(self, value: Any) -> None: @@ -737,7 +730,7 @@ def __len__(self) -> int: assert len(self._sorted_starts) == len(self._sorted_ends) return len(self._sorted_starts) - def __iter__(self) -> Iterator[Tuple[int, int]]: + def __iter__(self) -> Iterator[tuple[int, int]]: return zip(self._sorted_starts, self._sorted_ends) def __str__(self) -> str: @@ -763,7 +756,7 @@ def __init__( self._pending_adds = SortedDict() self._pending_removes = RangeSet() - def add(self, elem: Tuple[timestamp.Timestamp, Any]) -> None: + def add(self, elem: tuple[timestamp.Timestamp, Any]) -> None: assert len(elem) == 2 key_ts, value = elem key = key_ts.micros @@ -772,14 +765,14 @@ def add(self, elem: Tuple[timestamp.Timestamp, Any]) -> None: raise ValueError("key value %d is out of range" % key) self._pending_adds.setdefault(key, []).append(value) - def read(self) -> Iterable[Tuple[timestamp.Timestamp, Any]]: + def read(self) -> Iterable[tuple[timestamp.Timestamp, Any]]: return self.read_range(self.TIMESTAMP_RANGE_MIN, self.TIMESTAMP_RANGE_MAX) def read_range( self, min_timestamp: timestamp.Timestamp, limit_timestamp: timestamp.Timestamp - ) -> Iterable[Tuple[timestamp.Timestamp, Any]]: + ) -> Iterable[tuple[timestamp.Timestamp, Any]]: # convert timestamp to int, as sort keys are stored as int internally. min_key = min_timestamp.micros limit_key = limit_timestamp.micros @@ -946,8 +939,8 @@ def __init__( self._key_coder = key_coder self._window_coder = window_coder # A mapping of {timer_family_id: TimerInfo} - self._timers_info: Dict[str, TimerInfo] = {} - self._all_states: Dict[tuple, FnApiUserRuntimeStateTypes] = {} + self._timers_info: dict[str, TimerInfo] = {} + self._all_states: dict[tuple, FnApiUserRuntimeStateTypes] = {} def add_timer_info(self, timer_family_id: str, timer_info: TimerInfo) -> None: self._timers_info[timer_family_id] = timer_info @@ -1086,7 +1079,7 @@ class BundleProcessor(object): """ A class for processing bundles of elements. """ def __init__( self, - runner_capabilities: FrozenSet[str], + runner_capabilities: frozenset[str], process_bundle_descriptor: beam_fn_api_pb2.ProcessBundleDescriptor, state_handler: sdk_worker.CachingStateHandler, data_channel_factory: data_plane.DataChannelFactory, @@ -1126,7 +1119,7 @@ def __init__( # {(transform_id, timer_family_id): TimerInfo} # The mapping is empty when there is no timer_family_specs in the # ProcessBundleDescriptor. - self.timers_info: Dict[Tuple[str, str], TimerInfo] = {} + self.timers_info: dict[tuple[str, str], TimerInfo] = {} # TODO(robertwb): Figure out the correct prefix to use for output counters # from StateSampler. @@ -1184,7 +1177,8 @@ def is_side_input(transform_proto, tag): transform_proto.spec.payload, beam_runner_api_pb2.ParDoPayload).side_inputs - pcoll_consumers: DefaultDict[str, List[str]] = collections.defaultdict(list) + pcoll_consumers: collections.defaultdict[ + str, list[str]] = collections.defaultdict(list) for transform_id, transform_proto in descriptor.transforms.items(): for tag, pcoll_id in transform_proto.inputs.items(): if not is_side_input(transform_proto, tag): @@ -1230,9 +1224,9 @@ def reset(self) -> None: def process_bundle( self, instruction_id: str - ) -> Tuple[List[beam_fn_api_pb2.DelayedBundleApplication], bool]: + ) -> tuple[list[beam_fn_api_pb2.DelayedBundleApplication], bool]: - expected_input_ops: List[DataInputOperation] = [] + expected_input_ops: list[DataInputOperation] = [] for op in self.ops.values(): if isinstance(op, DataOutputOperation): @@ -1258,10 +1252,9 @@ def process_bundle( # both data input and timer input. The data input is identied by # transform_id. The data input is identified by # (transform_id, timer_family_id). - data_channels: DefaultDict[data_plane.DataChannel, - List[Union[str, Tuple[ - str, - str]]]] = collections.defaultdict(list) + data_channels: collections.defaultdict[ + data_plane.DataChannel, + list[Union[str, tuple[str, str]]]] = collections.defaultdict(list) # Add expected data inputs for each data channel. input_op_by_transform_id = {} @@ -1410,7 +1403,7 @@ def construct_bundle_application( if output_watermark: proto_output_watermark = proto_utils.from_micros( timestamp_pb2.Timestamp, output_watermark.micros) - output_watermarks: Optional[Dict[str, timestamp_pb2.Timestamp]] = { + output_watermarks: Optional[dict[str, timestamp_pb2.Timestamp]] = { output: proto_output_watermark for output in outputs } @@ -1422,7 +1415,7 @@ def construct_bundle_application( output_watermarks=output_watermarks, element=main_input_coder.get_impl().encode_nested(element)) - def monitoring_infos(self) -> List[metrics_pb2.MonitoringInfo]: + def monitoring_infos(self) -> list[metrics_pb2.MonitoringInfo]: """Returns the list of MonitoringInfos collected processing this bundle.""" # Construct a new dict first to remove duplicates. all_monitoring_infos_dict = {} @@ -1442,7 +1435,7 @@ def shutdown(self) -> None: @dataclass class ExecutionContext: # Any splits to be processed later. - delayed_applications: List[Tuple[operations.DoOperation, + delayed_applications: list[tuple[operations.DoOperation, common.SplitResultResidual]] = field( default_factory=list) @@ -1457,7 +1450,7 @@ class BeamTransformFactory(object): """Factory for turning transform_protos into executable operations.""" def __init__( self, - runner_capabilities: FrozenSet[str], + runner_capabilities: frozenset[str], descriptor: beam_fn_api_pb2.ProcessBundleDescriptor, data_channel_factory: data_plane.DataChannelFactory, counter_factory: counters.CounterFactory, @@ -1480,21 +1473,21 @@ def __init__( element_coder_impl)) self.data_sampler = data_sampler - _known_urns: Dict[str, - Tuple[ConstructorFn, - Union[Type[message.Message], Type[bytes], + _known_urns: dict[str, + tuple[ConstructorFn, + Union[type[message.Message], type[bytes], None]]] = {} @classmethod def register_urn( - cls, urn: str, parameter_type: Optional[Type[T]] + cls, urn: str, parameter_type: Optional[type[T]] ) -> Callable[[ Callable[[ BeamTransformFactory, str, beam_runner_api_pb2.PTransform, T, - Dict[str, List[operations.Operation]] + dict[str, list[operations.Operation]] ], operations.Operation] ], @@ -1503,7 +1496,7 @@ def register_urn( str, beam_runner_api_pb2.PTransform, T, - Dict[str, List[operations.Operation]] + dict[str, list[operations.Operation]] ], operations.Operation]]: def wrapper(func): @@ -1514,7 +1507,7 @@ def wrapper(func): def create_operation( self, transform_id: str, - consumers: Dict[str, List[operations.Operation]]) -> operations.Operation: + consumers: dict[str, list[operations.Operation]]) -> operations.Operation: transform_proto = self.descriptor.transforms[transform_id] if not transform_proto.unique_name: _LOGGER.debug("No unique name set for transform %s" % transform_id) @@ -1524,7 +1517,7 @@ def create_operation( transform_proto.spec.payload, parameter_type) return creator(self, transform_id, transform_proto, payload, consumers) - def extract_timers_info(self) -> Dict[Tuple[str, str], TimerInfo]: + def extract_timers_info(self) -> dict[tuple[str, str], TimerInfo]: timers_info = {} for transform_id, transform_proto in self.descriptor.transforms.items(): if transform_proto.spec.urn == common_urns.primitives.PAR_DO.urn: @@ -1563,7 +1556,7 @@ def get_windowed_coder(self, pcoll_id: str) -> WindowedValueCoder: def get_output_coders( self, transform_proto: beam_runner_api_pb2.PTransform - ) -> Dict[str, coders.Coder]: + ) -> dict[str, coders.Coder]: return { tag: self.get_windowed_coder(pcoll_id) for tag, pcoll_id in transform_proto.outputs.items() @@ -1575,7 +1568,7 @@ def get_only_output_coder( def get_input_coders( self, transform_proto: beam_runner_api_pb2.PTransform - ) -> Dict[str, coders.WindowedValueCoder]: + ) -> dict[str, coders.WindowedValueCoder]: return { tag: self.get_windowed_coder(pcoll_id) for tag, pcoll_id in transform_proto.inputs.items() @@ -1598,7 +1591,7 @@ def augment_oldstyle_op( op: OperationT, step_name: str, consumers: Mapping[str, Iterable[operations.Operation]], - tag_list: Optional[List[str]] = None) -> OperationT: + tag_list: Optional[list[str]] = None) -> OperationT: op.step_name = step_name for tag, op_consumers in consumers.items(): for consumer in op_consumers: @@ -1613,7 +1606,7 @@ def create_source_runner( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, grpc_port: beam_fn_api_pb2.RemoteGrpcPort, - consumers: Dict[str, List[operations.Operation]]) -> DataInputOperation: + consumers: dict[str, list[operations.Operation]]) -> DataInputOperation: output_coder = factory.get_coder(grpc_port.coder_id) return DataInputOperation( @@ -1634,7 +1627,7 @@ def create_sink_runner( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, grpc_port: beam_fn_api_pb2.RemoteGrpcPort, - consumers: Dict[str, List[operations.Operation]]) -> DataOutputOperation: + consumers: dict[str, list[operations.Operation]]) -> DataOutputOperation: output_coder = factory.get_coder(grpc_port.coder_id) return DataOutputOperation( common.NameContext(transform_proto.unique_name, transform_id), @@ -1653,8 +1646,8 @@ def create_source_java( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter, - consumers: Dict[str, - List[operations.Operation]]) -> operations.ReadOperation: + consumers: dict[str, + list[operations.Operation]]) -> operations.ReadOperation: # The Dataflow runner harness strips the base64 encoding. source = pickler.loads(base64.b64encode(parameter)) spec = operation_specs.WorkerRead( @@ -1677,8 +1670,8 @@ def create_deprecated_read( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter: beam_runner_api_pb2.ReadPayload, - consumers: Dict[str, - List[operations.Operation]]) -> operations.ReadOperation: + consumers: dict[str, + list[operations.Operation]]) -> operations.ReadOperation: source = iobase.BoundedSource.from_runner_api( parameter.source, factory.context) spec = operation_specs.WorkerRead( @@ -1701,7 +1694,7 @@ def create_read_from_impulse_python( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter: beam_runner_api_pb2.ReadPayload, - consumers: Dict[str, List[operations.Operation]] + consumers: dict[str, list[operations.Operation]] ) -> operations.ImpulseReadOperation: return operations.ImpulseReadOperation( common.NameContext(transform_proto.unique_name, transform_id), @@ -1718,7 +1711,7 @@ def create_dofn_javasdk( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, serialized_fn, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_pardo_operation( factory, transform_id, transform_proto, consumers, serialized_fn) @@ -1806,7 +1799,7 @@ def create_process_sized_elements_and_restrictions( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter: beam_runner_api_pb2.ParDoPayload, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_pardo_operation( factory, transform_id, @@ -1852,7 +1845,7 @@ def create_par_do( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter: beam_runner_api_pb2.ParDoPayload, - consumers: Dict[str, List[operations.Operation]]) -> operations.DoOperation: + consumers: dict[str, list[operations.Operation]]) -> operations.DoOperation: return _create_pardo_operation( factory, transform_id, @@ -1985,7 +1978,7 @@ def create_assign_windows( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter: beam_runner_api_pb2.WindowingStrategy, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): class WindowIntoDoFn(beam.DoFn): def __init__(self, windowing): self.windowing = windowing @@ -2016,7 +2009,7 @@ def create_identity_dofn( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, parameter, - consumers: Dict[str, List[operations.Operation]] + consumers: dict[str, list[operations.Operation]] ) -> operations.FlattenOperation: return factory.augment_oldstyle_op( operations.FlattenOperation( @@ -2037,8 +2030,8 @@ def create_combine_per_key_precombine( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload: beam_runner_api_pb2.CombinePayload, - consumers: Dict[str, - List[operations.Operation]]) -> operations.PGBKCVOperation: + consumers: dict[str, + list[operations.Operation]]) -> operations.PGBKCVOperation: serialized_combine_fn = pickler.dumps(( beam.CombineFn.from_runner_api(payload.combine_fn, factory.context), [], {})) @@ -2063,7 +2056,7 @@ def create_combbine_per_key_merge_accumulators( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload: beam_runner_api_pb2.CombinePayload, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_combine_phase_operation( factory, transform_id, transform_proto, payload, consumers, 'merge') @@ -2076,7 +2069,7 @@ def create_combine_per_key_extract_outputs( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload: beam_runner_api_pb2.CombinePayload, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_combine_phase_operation( factory, transform_id, transform_proto, payload, consumers, 'extract') @@ -2089,7 +2082,7 @@ def create_combine_per_key_convert_to_accumulators( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload: beam_runner_api_pb2.CombinePayload, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_combine_phase_operation( factory, transform_id, transform_proto, payload, consumers, 'convert') @@ -2102,7 +2095,7 @@ def create_combine_grouped_values( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload: beam_runner_api_pb2.CombinePayload, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): return _create_combine_phase_operation( factory, transform_id, transform_proto, payload, consumers, 'all') @@ -2132,7 +2125,7 @@ def create_flatten( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, payload, - consumers: Dict[str, List[operations.Operation]] + consumers: dict[str, list[operations.Operation]] ) -> operations.FlattenOperation: return factory.augment_oldstyle_op( operations.FlattenOperation( @@ -2152,7 +2145,7 @@ def create_map_windows( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, mapping_fn_spec: beam_runner_api_pb2.FunctionSpec, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): assert mapping_fn_spec.urn == python_urns.PICKLED_WINDOW_MAPPING_FN window_mapping_fn = pickler.loads(mapping_fn_spec.payload) @@ -2172,7 +2165,7 @@ def create_merge_windows( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, mapping_fn_spec: beam_runner_api_pb2.FunctionSpec, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): assert mapping_fn_spec.urn == python_urns.PICKLED_WINDOWFN window_fn = pickler.loads(mapping_fn_spec.payload) @@ -2180,10 +2173,10 @@ class MergeWindows(beam.DoFn): def process(self, element): nonce, windows = element - original_windows: Set[window.BoundedWindow] = set(windows) + original_windows: set[window.BoundedWindow] = set(windows) merged_windows: MutableMapping[ window.BoundedWindow, - Set[window.BoundedWindow]] = collections.defaultdict( + set[window.BoundedWindow]] = collections.defaultdict( set) # noqa: F821 class RecordingMergeContext(window.WindowFn.MergeContext): @@ -2213,7 +2206,7 @@ def create_to_string_fn( transform_id: str, transform_proto: beam_runner_api_pb2.PTransform, mapping_fn_spec: beam_runner_api_pb2.FunctionSpec, - consumers: Dict[str, List[operations.Operation]]): + consumers: dict[str, list[operations.Operation]]): class ToString(beam.DoFn): def process(self, element): key, value = element diff --git a/sdks/python/apache_beam/runners/worker/data_sampler.py b/sdks/python/apache_beam/runners/worker/data_sampler.py index c95c88f2dbdf..a8bbe2bad7ed 100644 --- a/sdks/python/apache_beam/runners/worker/data_sampler.py +++ b/sdks/python/apache_beam/runners/worker/data_sampler.py @@ -29,12 +29,8 @@ from dataclasses import dataclass from threading import Timer from typing import Any -from typing import Deque -from typing import Dict from typing import Iterable -from typing import List from typing import Optional -from typing import Tuple from typing import Union from apache_beam.coders.coder_impl import CoderImpl @@ -116,14 +112,15 @@ def __init__( coder: Coder, max_samples: int = 10, sample_every_sec: float = 5) -> None: - self._samples: Deque[Any] = collections.deque(maxlen=max_samples) + self._samples: collections.deque[Any] = collections.deque( + maxlen=max_samples) self._samples_lock: threading.Lock = threading.Lock() self._coder_impl: CoderImpl = coder.get_impl() self._sample_timer = SampleTimer(sample_every_sec, self) self.element_sampler = ElementSampler() self.element_sampler.has_element = False - self._exceptions: Deque[Tuple[Any, ExceptionMetadata]] = collections.deque( - maxlen=max_samples) + self._exceptions: collections.deque[tuple[ + Any, ExceptionMetadata]] = collections.deque(maxlen=max_samples) # For testing, it's easier to disable the Timer and manually sample. if sample_every_sec > 0: @@ -143,7 +140,7 @@ def remove_windowed_value(self, el: Union[WindowedValue, Any]) -> Any: el = el.value return el - def flush(self, clear: bool = True) -> List[beam_fn_api_pb2.SampledElement]: + def flush(self, clear: bool = True) -> list[beam_fn_api_pb2.SampledElement]: """Returns all samples and optionally clears buffer if clear is True.""" with self._samples_lock: # TODO(rohdesamuel): There can duplicates between the exceptions and @@ -226,13 +223,13 @@ def __init__( sample_only_exceptions: bool = False, clock=None) -> None: # Key is PCollection id. Is guarded by the _samplers_lock. - self._samplers: Dict[str, OutputSampler] = {} + self._samplers: dict[str, OutputSampler] = {} # Bundles are processed in parallel, so new samplers may be added when the # runner queries for samples. self._samplers_lock: threading.Lock = threading.Lock() self._max_samples = max_samples self._sample_every_sec = 0.0 if sample_only_exceptions else sample_every_sec - self._samplers_by_output: Dict[str, List[OutputSampler]] = {} + self._samplers_by_output: dict[str, list[OutputSampler]] = {} self._clock = clock _ENABLE_DATA_SAMPLING = 'enable_data_sampling' @@ -286,7 +283,7 @@ def initialize_samplers( self, transform_id: str, descriptor: beam_fn_api_pb2.ProcessBundleDescriptor, - coder_factory) -> List[OutputSampler]: + coder_factory) -> list[OutputSampler]: """Creates the OutputSamplers for the given PTransform. This initializes the samplers only once per PCollection Id. Note that an @@ -347,7 +344,7 @@ def samples( return ret def wait_for_samples( - self, pcollection_ids: List[str]) -> beam_fn_api_pb2.SampleDataResponse: + self, pcollection_ids: list[str]) -> beam_fn_api_pb2.SampleDataResponse: """Waits for samples to exist for the given PCollections (only testing).""" now = time.time() end = now + 30 diff --git a/sdks/python/apache_beam/runners/worker/data_sampler_test.py b/sdks/python/apache_beam/runners/worker/data_sampler_test.py index 47b6cca880d3..d54d7690e247 100644 --- a/sdks/python/apache_beam/runners/worker/data_sampler_test.py +++ b/sdks/python/apache_beam/runners/worker/data_sampler_test.py @@ -22,7 +22,6 @@ import traceback import unittest from typing import Any -from typing import List from typing import Optional from apache_beam.coders import FastPrimitivesCoder @@ -42,8 +41,8 @@ class DataSamplerTest(unittest.TestCase): def make_test_descriptor( self, - outputs: Optional[List[str]] = None, - transforms: Optional[List[str]] = None + outputs: Optional[list[str]] = None, + transforms: Optional[list[str]] = None ) -> beam_fn_api_pb2.ProcessBundleDescriptor: outputs = outputs or [MAIN_PCOLLECTION_ID] transforms = transforms or [MAIN_TRANSFORM_ID] diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py index 69815acc7194..c9848a6460c2 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler.py +++ b/sdks/python/apache_beam/runners/worker/log_handler.py @@ -29,7 +29,6 @@ import traceback from typing import Iterable from typing import Iterator -from typing import List from typing import Union from typing import cast @@ -184,7 +183,7 @@ def _write_log_entries(self) -> Iterator[beam_fn_api_pb2.LogEntry.List]: # typing: log_entries was initialized as List[Union[..., Sentinel]], # but now that we've popped the sentinel out (above) we can safely cast yield beam_fn_api_pb2.LogEntry.List( - log_entries=cast(List[beam_fn_api_pb2.LogEntry], log_entries)) + log_entries=cast(list[beam_fn_api_pb2.LogEntry], log_entries)) def _read_log_control_messages(self) -> None: # Only reconnect when we are alive. diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py index 06e2508fb7d2..5fd326d6cd4d 100644 --- a/sdks/python/apache_beam/runners/worker/logger.py +++ b/sdks/python/apache_beam/runners/worker/logger.py @@ -20,6 +20,7 @@ # pytype: skip-file # mypy: disallow-untyped-defs +# ruff: noqa: UP006 import contextlib import json import logging diff --git a/sdks/python/apache_beam/runners/worker/opcounters.py b/sdks/python/apache_beam/runners/worker/opcounters.py index f5883cfbf2ef..66a369ba3bbb 100644 --- a/sdks/python/apache_beam/runners/worker/opcounters.py +++ b/sdks/python/apache_beam/runners/worker/opcounters.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import math import random import sys diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py index f96eb3728717..bfa1c0f3d47c 100644 --- a/sdks/python/apache_beam/runners/worker/operations.py +++ b/sdks/python/apache_beam/runners/worker/operations.py @@ -20,6 +20,7 @@ # pytype: skip-file # pylint: disable=super-with-arguments +# ruff: noqa: UP006 import collections import logging import threading diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py index 6060ff8d54a8..e1f17296057a 100644 --- a/sdks/python/apache_beam/runners/worker/sdk_worker.py +++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py @@ -36,15 +36,11 @@ from typing import Any from typing import Callable from typing import DefaultDict -from typing import Dict -from typing import FrozenSet from typing import Generic from typing import Iterable from typing import Iterator -from typing import List from typing import MutableMapping from typing import Optional -from typing import Tuple from typing import TypeVar from typing import Union @@ -58,7 +54,6 @@ from apache_beam.portability.api import metrics_pb2 from apache_beam.runners.worker import bundle_processor from apache_beam.runners.worker import data_plane -from apache_beam.runners.worker import data_sampler from apache_beam.runners.worker import statesampler from apache_beam.runners.worker.channel_factory import GRPCChannelFactory from apache_beam.runners.worker.data_plane import PeriodicThread @@ -71,8 +66,7 @@ from apache_beam.version import __version__ as beam_version if TYPE_CHECKING: - from apache_beam.portability.api import endpoints_pb2 - from apache_beam.utils.profiler import Profile + pass T = TypeVar('T') _KT = TypeVar('_KT') @@ -1461,7 +1455,7 @@ def set(self, value): raise NotImplementedError() -class KeyedDefaultDict(DefaultDict[_KT, _VT]): +class KeyedDefaultDict(collections.defaultdict[_KT, _VT]): if TYPE_CHECKING: # we promise to only use a subset of what DefaultDict can do def __init__(self, default_factory): diff --git a/sdks/python/apache_beam/runners/worker/statecache.py b/sdks/python/apache_beam/runners/worker/statecache.py index d4e61cc9297f..33e1c55deeff 100644 --- a/sdks/python/apache_beam/runners/worker/statecache.py +++ b/sdks/python/apache_beam/runners/worker/statecache.py @@ -29,8 +29,6 @@ import weakref from typing import Any from typing import Callable -from typing import List -from typing import Tuple from typing import Union import objsize @@ -77,12 +75,12 @@ class CacheAware(object): def __init__(self) -> None: pass - def get_referents_for_cache(self) -> List[Any]: + def get_referents_for_cache(self) -> list[Any]: """Returns the list of objects accounted during cache measurement.""" raise NotImplementedError() -def _safe_isinstance(obj: Any, type: Union[type, Tuple[type, ...]]) -> bool: +def _safe_isinstance(obj: Any, type: Union[type, tuple[type, ...]]) -> bool: """ Return whether an object is an instance of a class or of a subclass thereof. See `isinstance()` for more information. @@ -126,7 +124,7 @@ def _size_func(obj: Any) -> int: _size_func.last_log_time = 0 # type: ignore -def _get_referents_func(*objs: List[Any]) -> List[Any]: +def _get_referents_func(*objs: list[Any]) -> list[Any]: """Returns the list of objects accounted during cache measurement. Users can inherit CacheAware to override which referents should be diff --git a/sdks/python/apache_beam/runners/worker/statesampler.py b/sdks/python/apache_beam/runners/worker/statesampler.py index b9c75f4de93d..22987b3f9c34 100644 --- a/sdks/python/apache_beam/runners/worker/statesampler.py +++ b/sdks/python/apache_beam/runners/worker/statesampler.py @@ -20,7 +20,6 @@ import contextlib import threading from typing import TYPE_CHECKING -from typing import Dict from typing import NamedTuple from typing import Optional from typing import Union @@ -96,7 +95,7 @@ def __init__( sampling_period_ms=DEFAULT_SAMPLING_PERIOD_MS): self._prefix = prefix self._counter_factory = counter_factory - self._states_by_name: Dict[CounterName, statesampler_impl.ScopedState] = {} + self._states_by_name: dict[CounterName, statesampler_impl.ScopedState] = {} self.sampling_period_ms = sampling_period_ms self.tracked_thread: Optional[threading.Thread] = None self.finished = False diff --git a/sdks/python/apache_beam/runners/worker/worker_pool_main.py b/sdks/python/apache_beam/runners/worker/worker_pool_main.py index 307261c2d3c3..425a9fc57752 100644 --- a/sdks/python/apache_beam/runners/worker/worker_pool_main.py +++ b/sdks/python/apache_beam/runners/worker/worker_pool_main.py @@ -37,9 +37,7 @@ import threading import time import traceback -from typing import Dict from typing import Optional -from typing import Tuple import grpc @@ -83,7 +81,7 @@ def __init__( self._container_executable = container_executable self._state_cache_size = state_cache_size self._data_buffer_time_limit_ms = data_buffer_time_limit_ms - self._worker_processes: Dict[str, subprocess.Popen] = {} + self._worker_processes: dict[str, subprocess.Popen] = {} @classmethod def start( @@ -92,7 +90,7 @@ def start( port=0, state_cache_size=0, data_buffer_time_limit_ms=-1, - container_executable: Optional[str] = None) -> Tuple[str, grpc.Server]: + container_executable: Optional[str] = None) -> tuple[str, grpc.Server]: options = [("grpc.http2.max_pings_without_data", 0), ("grpc.http2.max_ping_strikes", 0)] worker_server = grpc.server( diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py index cbbb9e5d3a2e..83f8aa3bfb36 100644 --- a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py +++ b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py @@ -17,9 +17,7 @@ import json import logging import os -from typing import List from typing import Optional -from typing import Tuple import requests @@ -67,8 +65,8 @@ def create_issue( title: str, description: str, - labels: Optional[List[str]] = None, -) -> Tuple[int, str]: + labels: Optional[list[str]] = None, +) -> tuple[int, str]: """ Create an issue with title, description with a label. @@ -99,7 +97,7 @@ def create_issue( def comment_on_issue(issue_number: int, - comment_description: str) -> Tuple[bool, str]: + comment_description: str) -> tuple[bool, str]: """ This method looks for an issue with provided issue_number. If an open issue is found, comment on the open issue with provided description else @@ -210,9 +208,9 @@ def get_issue_description( def report_change_point_on_issues( title: str, description: str, - labels: Optional[List[str]] = None, + labels: Optional[list[str]] = None, existing_issue_number: Optional[int] = None, -) -> Tuple[int, str]: +) -> tuple[int, str]: """ Comments the description on the existing issue (if provided and still open), or creates a new issue. diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py index 829f93fc1a35..118745a41b62 100644 --- a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py +++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py @@ -26,7 +26,6 @@ from datetime import datetime from datetime import timezone from typing import Any -from typing import Dict import pandas as pd @@ -46,7 +45,7 @@ def get_test_config_container( - params: Dict[str, Any], + params: dict[str, Any], test_id: str, metric_name: str, ) -> TestConfigContainer: @@ -69,7 +68,7 @@ def get_test_config_container( def get_change_point_config( - params: Dict[str, Any], + params: dict[str, Any], ) -> ChangePointConfig: """ Args: @@ -242,7 +241,7 @@ def run( defined in the config file. """ - tests_config: Dict[str, Dict[str, Any]] = read_test_config(config_file_path) + tests_config: dict[str, dict[str, Any]] = read_test_config(config_file_path) for test_id, params in tests_config.items(): # single test config can have multiple metrics so we need to diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py index 62a0ec676499..2d91e201d428 100644 --- a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py +++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py @@ -19,10 +19,7 @@ from dataclasses import asdict from dataclasses import dataclass from statistics import median -from typing import Dict -from typing import List from typing import Optional -from typing import Tuple from typing import Union import pandas as pd @@ -80,7 +77,7 @@ class TestConfigContainer: test_id: str # unique id for each test config. test_description: str test_name: Optional[str] = None - labels: Optional[List[str]] = None + labels: Optional[list[str]] = None @dataclass @@ -92,8 +89,8 @@ class MetricContainer: timestamps: List of pandas timestamps corresponding to the metric values. """ - values: List[Union[int, float]] - timestamps: List[pd.Timestamp] + values: list[Union[int, float]] + timestamps: list[pd.Timestamp] def sort_by_timestamp(self, in_place=True): """ @@ -138,9 +135,9 @@ def get_existing_issues_data(table_name: str) -> Optional[pd.DataFrame]: def is_sibling_change_point( - previous_change_point_timestamps: List[pd.Timestamp], + previous_change_point_timestamps: list[pd.Timestamp], change_point_index: int, - timestamps: List[pd.Timestamp], + timestamps: list[pd.Timestamp], min_runs_between_change_points: int, test_id: str, ) -> bool: @@ -181,7 +178,7 @@ def is_sibling_change_point( return True -def read_test_config(config_file_path: str) -> Dict: +def read_test_config(config_file_path: str) -> dict: """ Reads the config file in which the data required to run the change point analysis is specified. @@ -195,12 +192,12 @@ def validate_config(keys): return constants._PERF_TEST_KEYS.issubset(keys) -def find_change_points(metric_values: List[Union[float, int]]): +def find_change_points(metric_values: list[Union[float, int]]): return e_divisive(metric_values) def find_latest_change_point_index( - metric_values: List[Union[float, int]], + metric_values: list[Union[float, int]], median_abs_deviation_threshold: int = 2): """ Args: @@ -262,7 +259,7 @@ def create_performance_alert( metric_container: MetricContainer, change_point_index: int, existing_issue_number: Optional[int], -) -> Tuple[int, str]: +) -> tuple[int, str]: """ Creates performance alert on GitHub issues and returns GitHub issue number and issue URL. @@ -293,8 +290,8 @@ def create_performance_alert( def filter_change_points_by_median_threshold( - data: List[Union[int, float]], - change_points: List[int], + data: list[Union[int, float]], + change_points: list[int], threshold: float = 0.05, median_abs_deviation_threshold: int = 2, ): diff --git a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py index f390c8c37001..df3684514f1f 100644 --- a/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py +++ b/sdks/python/apache_beam/testing/benchmarks/nexmark/queries/query3.py @@ -108,11 +108,11 @@ def __init__(self, max_auction_wait_time): def process( # type: ignore self, - element: typing.Tuple[ + element: tuple[ str, - typing.Dict[str, - typing.Union[typing.List[nexmark_model.Auction], - typing.List[nexmark_model.Person]]]], + dict[str, + typing.Union[list[nexmark_model.Auction], + list[nexmark_model.Person]]]], auction_state=beam.DoFn.StateParam(auction_spec), person_state=beam.DoFn.StateParam(person_spec), person_timer=beam.DoFn.TimerParam(person_timer_spec)): diff --git a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py index 33dfeeddba4f..6503cc9f273c 100644 --- a/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py +++ b/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py @@ -34,8 +34,6 @@ import time import uuid from typing import Any -from typing import Dict -from typing import List from typing import Mapping from typing import Optional from typing import Union @@ -212,7 +210,7 @@ def __init__( filters: MetricFilter to query only filtered metrics """ self._namespace = namespace - self.publishers: List[MetricsPublisher] = [] + self.publishers: list[MetricsPublisher] = [] # publish to console output self.publishers.append(ConsoleMetricsPublisher()) @@ -275,8 +273,8 @@ def publish_metrics( for publisher in self.publishers: publisher.publish(insert_dicts) - def _add_job_id_to_metrics(self, metrics: List[Dict[str, Any]], - job_id) -> List[Dict[str, Any]]: + def _add_job_id_to_metrics(self, metrics: list[dict[str, Any]], + job_id) -> list[dict[str, Any]]: for metric in metrics: metric[JOB_ID_LABEL] = job_id return metrics @@ -547,7 +545,7 @@ def __init__(self, options: InfluxDBMetricsPublisherOptions): self.options = options def publish( - self, results: List[Mapping[str, Union[float, str, int]]]) -> None: + self, results: list[Mapping[str, Union[float, str, int]]]) -> None: url = '{}/write'.format(self.options.hostname) payload = self._build_payload(results) query_str = {'db': self.options.db_name, 'precision': 's'} @@ -569,7 +567,7 @@ def publish( (response.status_code, content['error'])) def _build_payload( - self, results: List[Mapping[str, Union[float, str, int]]]) -> str: + self, results: list[Mapping[str, Union[float, str, int]]]) -> str: def build_kv(mapping, key): return '{}={}'.format(key, mapping[key]) diff --git a/sdks/python/apache_beam/testing/load_tests/sideinput_test.py b/sdks/python/apache_beam/testing/load_tests/sideinput_test.py index 3b5dfdf38cd9..4ccc0f09c4ed 100644 --- a/sdks/python/apache_beam/testing/load_tests/sideinput_test.py +++ b/sdks/python/apache_beam/testing/load_tests/sideinput_test.py @@ -58,9 +58,7 @@ import logging from typing import Any -from typing import Dict from typing import Iterable -from typing import Tuple from typing import Union import apache_beam as beam @@ -112,7 +110,7 @@ def __init__(self, first_n: int): self._first_n = first_n def process( - self, element: Any, side_input: Iterable[Tuple[bytes, + self, element: Any, side_input: Iterable[tuple[bytes, bytes]]) -> None: i = 0 it = iter(side_input) @@ -130,7 +128,7 @@ def __init__(self, first_n: int): self._first_n = first_n def process( - self, element: Any, dict_side_input: Dict[bytes, bytes]) -> None: + self, element: Any, dict_side_input: dict[bytes, bytes]) -> None: i = 0 for key in dict_side_input: if i == self._first_n: @@ -156,7 +154,7 @@ def __init__( self.key_size = key_size self.value_size = value_size - def process(self, element: Any) -> Iterable[Dict[str, Union[int, str]]]: + def process(self, element: Any) -> Iterable[dict[str, Union[int, str]]]: yield { 'num_records': self.elements_per_record, 'key_size': self.key_size, diff --git a/sdks/python/apache_beam/testing/synthetic_pipeline.py b/sdks/python/apache_beam/testing/synthetic_pipeline.py index 0ccbfbfbcc18..efc426ed0ff4 100644 --- a/sdks/python/apache_beam/testing/synthetic_pipeline.py +++ b/sdks/python/apache_beam/testing/synthetic_pipeline.py @@ -42,7 +42,6 @@ import time from random import Random from typing import Optional -from typing import Tuple import apache_beam as beam from apache_beam import pvalue @@ -911,7 +910,7 @@ def __init__(self, input_options, num_keys=100): self.value_size = input_options['value_size'] self.num_keys = num_keys - @typehints.with_output_types(Tuple[bytes, bytes]) + @typehints.with_output_types(tuple[bytes, bytes]) class GenerateKeys(beam.DoFn): def __init__(self, num_keys, key_size): self.num_keys = num_keys diff --git a/sdks/python/apache_beam/testing/util.py b/sdks/python/apache_beam/testing/util.py index 5a7c36fa4458..cfe8221b4aa2 100644 --- a/sdks/python/apache_beam/testing/util.py +++ b/sdks/python/apache_beam/testing/util.py @@ -25,7 +25,6 @@ import tempfile from typing import Any from typing import Iterable -from typing import List from typing import NamedTuple from apache_beam import pvalue @@ -67,7 +66,7 @@ class BeamAssertException(Exception): class TestWindowedValue(NamedTuple): value: Any timestamp: Any - windows: List + windows: list pane_info: PaneInfo = PANE_INFO_UNKNOWN diff --git a/sdks/python/apache_beam/tools/runtime_type_check_microbenchmark.py b/sdks/python/apache_beam/tools/runtime_type_check_microbenchmark.py index 6ba01b9a5172..07be679fb716 100644 --- a/sdks/python/apache_beam/tools/runtime_type_check_microbenchmark.py +++ b/sdks/python/apache_beam/tools/runtime_type_check_microbenchmark.py @@ -31,7 +31,6 @@ from collections import defaultdict from time import time from typing import Iterable -from typing import Tuple from typing import Union import apache_beam as beam @@ -39,27 +38,27 @@ from apache_beam.tools import utils -@beam.typehints.with_input_types(Tuple[int, ...]) +@beam.typehints.with_input_types(tuple[int, ...]) class SimpleInput(beam.DoFn): def process(self, element, *args, **kwargs): yield element -@beam.typehints.with_output_types(Tuple[int, ...]) +@beam.typehints.with_output_types(tuple[int, ...]) class SimpleOutput(beam.DoFn): def process(self, element, *args, **kwargs): yield element @beam.typehints.with_input_types( - Tuple[int, str, Tuple[float, ...], Iterable[int], Union[str, int]]) + tuple[int, str, tuple[float, ...], Iterable[int], Union[str, int]]) class NestedInput(beam.DoFn): def process(self, element, *args, **kwargs): yield element @beam.typehints.with_output_types( - Tuple[int, str, Tuple[float, ...], Iterable[int], Union[str, int]]) + tuple[int, str, tuple[float, ...], Iterable[int], Union[str, int]]) class NestedOutput(beam.DoFn): def process(self, element, *args, **kwargs): yield element diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index 35fa0e19ebd8..55a2cf40b64a 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -3441,8 +3441,8 @@ def StripNonce(nonce_key_value): | CombinePerKey(PostCombineFn())) -@typehints.with_input_types(typing.Tuple[K, V]) -@typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, typing.Iterable[V]]) class GroupByKey(PTransform): """A group by key transform. diff --git a/sdks/python/apache_beam/transforms/cy_combiners.py b/sdks/python/apache_beam/transforms/cy_combiners.py index b5cc7493a29a..6736ccdb3d66 100644 --- a/sdks/python/apache_beam/transforms/cy_combiners.py +++ b/sdks/python/apache_beam/transforms/cy_combiners.py @@ -22,6 +22,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import operator from apache_beam.transforms import core diff --git a/sdks/python/apache_beam/transforms/deduplicate.py b/sdks/python/apache_beam/transforms/deduplicate.py index 916b071fdf02..5ecdca0803f9 100644 --- a/sdks/python/apache_beam/transforms/deduplicate.py +++ b/sdks/python/apache_beam/transforms/deduplicate.py @@ -38,8 +38,8 @@ V = typing.TypeVar('V') -@typehints.with_input_types(typing.Tuple[K, V]) -@typehints.with_output_types(typing.Tuple[K, V]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, V]) class DeduplicatePerKey(ptransform.PTransform): """ A PTransform which deduplicates pair over a time domain and threshold. Values in different windows will NOT be considered duplicates of diff --git a/sdks/python/apache_beam/transforms/enrichment_handlers/cloudsql.py b/sdks/python/apache_beam/transforms/enrichment_handlers/cloudsql.py index d2ddd598209e..85ff530b1baf 100644 --- a/sdks/python/apache_beam/transforms/enrichment_handlers/cloudsql.py +++ b/sdks/python/apache_beam/transforms/enrichment_handlers/cloudsql.py @@ -23,8 +23,6 @@ from dataclasses import field from enum import Enum from typing import Any -from typing import Dict -from typing import List from typing import Optional from typing import Union @@ -59,7 +57,7 @@ class TableFieldsQueryConfig: """Configuration for using table name, where clause, and field names.""" table_id: str where_clause_template: str - where_clause_fields: List[str] + where_clause_fields: list[str] def __post_init__(self): if not self.table_id or not self.where_clause_template: @@ -144,8 +142,8 @@ class CloudSQLConnectionConfig(ConnectionConfig): password: str = field(default_factory=str) db_id: str = field(default_factory=str) refresh_strategy: RefreshStrategy = RefreshStrategy.LAZY - connector_kwargs: Dict[str, Any] = field(default_factory=dict) - connect_kwargs: Dict[str, Any] = field(default_factory=dict) + connector_kwargs: dict[str, Any] = field(default_factory=dict) + connect_kwargs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if not self.instance_connection_uri: @@ -191,7 +189,7 @@ class ExternalSQLDBConnectionConfig(ConnectionConfig): user: str = field(default_factory=str) password: str = field(default_factory=str) db_id: str = field(default_factory=str) - connect_kwargs: Dict[str, Any] = field(default_factory=dict) + connect_kwargs: dict[str, Any] = field(default_factory=dict) def __post_init__(self): if not self.host: @@ -338,7 +336,7 @@ def __call__( def _process_single_request(self, request: beam.Row): """Process a single request and return with its response.""" - response: Union[List[Dict[str, Any]], Dict[str, Any]] + response: Union[list[dict[str, Any]], dict[str, Any]] if isinstance(self._query_config, CustomQueryConfig): query = self._query_config.query_fn(request) response = self._execute_query(query, is_batch=False) @@ -369,7 +367,7 @@ def _process_batch_request(self, requests: list[beam.Row]): param_dict = self._build_parameters_dict(requests, batch_size) # Execute the parameterized query with validated parameters. - result: Union[List[Dict[str, Any]], Dict[str, Any]] = self._execute_query( + result: Union[list[dict[str, Any]], dict[str, Any]] = self._execute_query( raw_query, params=param_dict, is_batch=True) for response in result: response_row = beam.Row(**response) # type: ignore[arg-type] @@ -382,7 +380,7 @@ def _execute_query( self, query: str, params: Optional[dict] = None, - is_batch: bool = False) -> Union[List[Dict[str, Any]], Dict[str, Any]]: + is_batch: bool = False) -> Union[list[dict[str, Any]], dict[str, Any]]: connection = None try: connection = self._engine.connect() @@ -393,7 +391,7 @@ def _execute_query( else: result = connection.execute(text(query)) # Materialize results while transaction is active. - data: Union[List[Dict[str, Any]], Dict[str, Any]] + data: Union[list[dict[str, Any]], dict[str, Any]] if is_batch: data = [row._asdict() for row in result] else: diff --git a/sdks/python/apache_beam/transforms/external_test.py b/sdks/python/apache_beam/transforms/external_test.py index 5f2ffd34c3bd..137c92861ed7 100644 --- a/sdks/python/apache_beam/transforms/external_test.py +++ b/sdks/python/apache_beam/transforms/external_test.py @@ -131,7 +131,7 @@ def get_payload_from_typing_hints(self, values): ('integer_example', int), ('boolean', bool), ('string_example', str), - ('list_of_strings', typing.List[str]), + ('list_of_strings', list[str]), ('mapping', typing.Mapping[str, float]), ('optional_integer', typing.Optional[int]), ]) @@ -175,8 +175,7 @@ def test_implicit_payload_builder_with_bytes(self): # Verify we have not modified a cached type (BEAM-10766) # TODO(BEAM-7372): Remove when bytes coercion code is removed. - self.assertEqual( - typehints.List[bytes], convert_to_beam_type(typing.List[bytes])) + self.assertEqual(typehints.List[bytes], convert_to_beam_type(list[bytes])) class ExternalTransformTest(unittest.TestCase): @@ -417,7 +416,7 @@ def __init__( integer_example: int, boolean: bool, string_example: str, - list_of_strings: typing.List[str], + list_of_strings: list[str], mapping: typing.Mapping[str, float], optional_integer: typing.Optional[int] = None, expansion_service=None): @@ -474,7 +473,7 @@ class DataclassTransform(beam.ExternalTransform): integer_example: int boolean: bool string_example: str - list_of_strings: typing.List[str] + list_of_strings: list[str] mapping: typing.Mapping[str, float] = dataclasses.field(default=dict) optional_integer: typing.Optional[int] = None expansion_service: dataclasses.InitVar[typing.Optional[str]] = None diff --git a/sdks/python/apache_beam/transforms/external_transform_provider_it_test.py b/sdks/python/apache_beam/transforms/external_transform_provider_it_test.py index e86beab563bb..624ed81d7e86 100644 --- a/sdks/python/apache_beam/transforms/external_transform_provider_it_test.py +++ b/sdks/python/apache_beam/transforms/external_transform_provider_it_test.py @@ -195,18 +195,18 @@ def test_script_fails_with_invalid_destinations(self): def test_pretty_types(self): types = [ - typing.Optional[typing.List[str]], + typing.Optional[list[str]], numpy.int16, str, - typing.Dict[str, numpy.float64], - typing.Optional[typing.Dict[str, typing.List[numpy.int64]]], - typing.Dict[int, typing.Optional[str]] + dict[str, numpy.float64], + typing.Optional[dict[str, list[numpy.int64]]], + dict[int, typing.Optional[str]] ] - expected_type_names = [('List[str]', True), ('int16', False), - ('str', False), ('Dict[str, float64]', False), - ('Dict[str, List[int64]]', True), - ('Dict[int, Optional[str]]', False)] + expected_type_names = [('list[str]', True), ('int16', False), + ('str', False), ('dict[str, float64]', False), + ('dict[str, list[int64]]', True), + ('dict[int, Optional[str]]', False)] for i in range(len(types)): self.assertEqual( @@ -248,7 +248,7 @@ def get_module(self, dest): return importlib.import_module(module) def write_wrappers_to_destinations_and_validate( - self, destinations: typing.List[str]): + self, destinations: list[str]): """ Generate wrappers from the config path and validate all destinations are included. diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py index 8c2acefccdb3..21691eebf3ae 100644 --- a/sdks/python/apache_beam/transforms/ptransform_test.py +++ b/sdks/python/apache_beam/transforms/ptransform_test.py @@ -1570,7 +1570,7 @@ def process(self, element, num): def test_pardo_does_not_type_check_using_type_hint_decorators(self): @with_input_types(a=int) - @with_output_types(typing.List[str]) + @with_output_types(list[str]) def int_to_str(a): return [str(a)] @@ -1585,7 +1585,7 @@ def int_to_str(a): def test_pardo_properly_type_checks_using_type_hint_decorators(self): @with_input_types(a=str) - @with_output_types(typing.List[str]) + @with_output_types(list[str]) def to_all_upper_case(a): return [a.upper()] @@ -1733,13 +1733,13 @@ def test_pardo_like_inheriting_output_types_from_annotation(self): def fn1(x: str) -> int: return 1 - def fn1_flat(x: str) -> typing.List[int]: + def fn1_flat(x: str) -> list[int]: return [1] def fn2(x: int, y: str) -> str: return y - def fn2_flat(x: int, y: str) -> typing.List[str]: + def fn2_flat(x: int, y: str) -> list[str]: return [y] # We only need the args section of the hints. @@ -1756,23 +1756,21 @@ def add(a: typing.Iterable[int]) -> int: return sum(a) self.assertCompatible( - typing.Tuple[typing.TypeVar('K'), int], - output_hints(beam.CombinePerKey(add))) + tuple[typing.TypeVar('K'), int], output_hints(beam.CombinePerKey(add))) def test_group_by_key_only_output_type_deduction(self): d = ( self.p | 'Str' >> beam.Create(['t', 'e', 's', 't']).with_output_types(str) | ( - 'Pair' >> beam.Map(lambda x: (x, ord(x))).with_output_types( - typing.Tuple[str, str])) + 'Pair' >> beam.Map(lambda x: + (x, ord(x))).with_output_types(tuple[str, str])) | beam.GroupByKey()) # Output type should correctly be deduced. # GBK-only should deduce that Tuple[A, B] is turned into # Tuple[A, Iterable[B]]. - self.assertCompatible( - typing.Tuple[str, typing.Iterable[str]], d.element_type) + self.assertCompatible(tuple[str, typing.Iterable[str]], d.element_type) def test_group_by_key_output_type_deduction(self): d = ( @@ -1780,13 +1778,12 @@ def test_group_by_key_output_type_deduction(self): | 'Str' >> beam.Create(range(20)).with_output_types(int) | ( 'PairNegative' >> beam.Map(lambda x: (x % 5, -x)).with_output_types( - typing.Tuple[int, int])) + tuple[int, int])) | beam.GroupByKey()) # Output type should correctly be deduced. # GBK should deduce that Tuple[A, B] is turned into Tuple[A, Iterable[B]]. - self.assertCompatible( - typing.Tuple[int, typing.Iterable[int]], d.element_type) + self.assertCompatible(tuple[int, typing.Iterable[int]], d.element_type) def test_group_by_key_only_does_not_type_check(self): # GBK will be passed raw int's here instead of some form of Tuple[A, B]. @@ -1886,7 +1883,7 @@ def test_run_time_type_checking_enabled_types_satisfied(self): self.p._options.view_as(TypeOptions).pipeline_type_check = False self.p._options.view_as(TypeOptions).runtime_type_check = True - @with_output_types(typing.Tuple[int, str]) + @with_output_types(tuple[int, str]) @with_input_types(x=str) def group_with_upper_ord(x): return (ord(x.upper()) % 5, x) @@ -1910,7 +1907,7 @@ def test_pipeline_checking_satisfied_but_run_time_types_violate(self): self.p._options.view_as(TypeOptions).pipeline_type_check = False self.p._options.view_as(TypeOptions).runtime_type_check = True - @with_output_types(typing.Tuple[bool, int]) + @with_output_types(tuple[bool, int]) @with_input_types(a=int) def is_even_as_key(a): # Simulate a programming error, should be: return (a % 2 == 0, a) @@ -1934,7 +1931,7 @@ def is_even_as_key(a): def test_pipeline_checking_satisfied_run_time_checking_satisfied(self): self.p._options.view_as(TypeOptions).pipeline_type_check = False - @with_output_types(typing.Tuple[bool, int]) + @with_output_types(tuple[bool, int]) @with_input_types(a=int) def is_even_as_key(a): # The programming error in the above test-case has now been fixed. @@ -1982,7 +1979,7 @@ def test_pipeline_runtime_checking_violation_composite_type_input(self): | ( 'Add' >> beam.FlatMap(lambda x_y: [x_y[0] + x_y[1]]).with_input_types( - typing.Tuple[int, int]).with_output_types(int))) + tuple[int, int]).with_output_types(int))) self.p.run() def test_pipeline_runtime_checking_violation_simple_type_output(self): @@ -2041,8 +2038,7 @@ def test_pipeline_runtime_checking_violation_composite_type_output(self): | ( 'Swap' >> beam.FlatMap(lambda x_y1: [x_y1[0] + x_y1[1]]).with_input_types( - typing.Tuple[int, float]).with_output_types( - typing.Tuple[float, int]))) + tuple[int, float]).with_output_types(tuple[float, int]))) self.p.run() def test_pipeline_runtime_checking_violation_with_side_inputs_decorator(self): @@ -2126,7 +2122,7 @@ def test_combine_pipeline_type_propagation_using_decorators(self): def sum_ints(ints): return sum(ints) - @with_output_types(typing.List[int]) + @with_output_types(list[int]) @with_input_types(n=int) def range_from_zero(n): return list(range(n + 1)) @@ -2312,10 +2308,10 @@ def test_mean_per_key_pipeline_checking_satisfied(self): | beam.Create(range(5)).with_output_types(int) | ( 'EvenGroup' >> beam.Map(lambda x: (not x % 2, x)).with_output_types( - typing.Tuple[bool, int])) + tuple[bool, int])) | 'EvenMean' >> combine.Mean.PerKey()) - self.assertCompatible(typing.Tuple[bool, float], d.element_type) + self.assertCompatible(tuple[bool, float], d.element_type) assert_that(d, equal_to([(False, 2.0), (True, 2.0)])) self.p.run() @@ -2327,7 +2323,7 @@ def test_mean_per_key_pipeline_checking_violated(self): | ( 'UpperPair' >> beam.Map(lambda x: (x.upper(), x)).with_output_types( - typing.Tuple[str, str])) + tuple[str, str])) | 'EvenMean' >> combine.Mean.PerKey()) self.p.run() err_msg = e.exception.args[0] @@ -2346,10 +2342,10 @@ def test_mean_per_key_runtime_checking_satisfied(self): | ( 'OddGroup' >> beam.Map(lambda x: (bool(x % 2), x)).with_output_types( - typing.Tuple[bool, int])) + tuple[bool, int])) | 'OddMean' >> combine.Mean.PerKey()) - self.assertCompatible(typing.Tuple[bool, float], d.element_type) + self.assertCompatible(tuple[bool, float], d.element_type) assert_that(d, equal_to([(False, 2.0), (True, 2.0)])) self.p.run() @@ -2366,7 +2362,7 @@ def test_mean_per_key_runtime_checking_violated(self): | ( 'OddGroup' >> beam.Map(lambda x: (x, str(bool(x % 2)))).with_output_types( - typing.Tuple[int, str])) + tuple[int, str])) | 'OddMean' >> combine.Mean.PerKey()) self.p.run() @@ -2397,10 +2393,10 @@ def test_count_perkey_pipeline_type_checking_satisfied(self): self.p | beam.Create(range(5)).with_output_types(int) | 'EvenGroup' >> beam.Map(lambda x: (not x % 2, x)).with_output_types( - typing.Tuple[bool, int]) + tuple[bool, int]) | 'CountInt' >> combine.Count.PerKey()) - self.assertCompatible(typing.Tuple[bool, int], d.element_type) + self.assertCompatible(tuple[bool, int], d.element_type) assert_that(d, equal_to([(False, 2), (True, 3)])) self.p.run() @@ -2420,11 +2416,11 @@ def test_count_perkey_runtime_type_checking_satisfied(self): d = ( self.p | beam.Create(['t', 'e', 's', 't']).with_output_types(str) - | 'DupKey' >> beam.Map(lambda x: (x, x)).with_output_types( - typing.Tuple[str, str]) + | 'DupKey' >> beam.Map(lambda x: + (x, x)).with_output_types(tuple[str, str]) | 'CountDups' >> combine.Count.PerKey()) - self.assertCompatible(typing.Tuple[str, int], d.element_type) + self.assertCompatible(tuple[str, int], d.element_type) assert_that(d, equal_to([('e', 1), ('s', 1), ('t', 2)])) self.p.run() @@ -2434,7 +2430,7 @@ def test_count_perelement_pipeline_type_checking_satisfied(self): | beam.Create([1, 1, 2, 3]).with_output_types(int) | 'CountElems' >> combine.Count.PerElement()) - self.assertCompatible(typing.Tuple[int, int], d.element_type) + self.assertCompatible(tuple[int, int], d.element_type) assert_that(d, equal_to([(1, 2), (2, 1), (3, 1)])) self.p.run() @@ -2461,7 +2457,7 @@ def test_count_perelement_runtime_type_checking_satisfied(self): | beam.Create([True, True, False, True, True]).with_output_types(bool) | 'CountElems' >> combine.Count.PerElement()) - self.assertCompatible(typing.Tuple[bool, int], d.element_type) + self.assertCompatible(tuple[bool, int], d.element_type) assert_that(d, equal_to([(False, 1), (True, 4)])) self.p.run() @@ -2506,11 +2502,10 @@ def test_per_key_pipeline_checking_satisfied(self): | beam.Create(range(100)).with_output_types(int) | ( 'GroupMod 3' >> beam.Map(lambda x: (x % 3, x)).with_output_types( - typing.Tuple[int, int])) + tuple[int, int])) | 'TopMod' >> combine.Top.PerKey(1)) - self.assertCompatible( - typing.Tuple[int, typing.Iterable[int]], d.element_type) + self.assertCompatible(tuple[int, typing.Iterable[int]], d.element_type) assert_that(d, equal_to([(0, [99]), (1, [97]), (2, [98])])) self.p.run() @@ -2522,11 +2517,10 @@ def test_per_key_runtime_checking_satisfied(self): | beam.Create(range(21)) | ( 'GroupMod 3' >> beam.Map(lambda x: (x % 3, x)).with_output_types( - typing.Tuple[int, int])) + tuple[int, int])) | 'TopMod' >> combine.Top.PerKey(1)) - self.assertCompatible( - typing.Tuple[int, typing.Iterable[int]], d.element_type) + self.assertCompatible(tuple[int, typing.Iterable[int]], d.element_type) assert_that(d, equal_to([(0, [18]), (1, [19]), (2, [20])])) self.p.run() @@ -2571,11 +2565,10 @@ def test_sample_per_key_pipeline_satisfied(self): self.p | ( beam.Create([(1, 2), (1, 2), (2, 3), - (2, 3)]).with_output_types(typing.Tuple[int, int])) + (2, 3)]).with_output_types(tuple[int, int])) | 'Sample' >> combine.Sample.FixedSizePerKey(2)) - self.assertCompatible( - typing.Tuple[int, typing.Iterable[int]], d.element_type) + self.assertCompatible(tuple[int, typing.Iterable[int]], d.element_type) def matcher(expected_len): def match(actual): @@ -2594,11 +2587,10 @@ def test_sample_per_key_runtime_satisfied(self): self.p | ( beam.Create([(1, 2), (1, 2), (2, 3), - (2, 3)]).with_output_types(typing.Tuple[int, int])) + (2, 3)]).with_output_types(tuple[int, int])) | 'Sample' >> combine.Sample.FixedSizePerKey(1)) - self.assertCompatible( - typing.Tuple[int, typing.Iterable[int]], d.element_type) + self.assertCompatible(tuple[int, typing.Iterable[int]], d.element_type) def matcher(expected_len): def match(actual): @@ -2616,7 +2608,7 @@ def test_to_list_pipeline_check_satisfied(self): | beam.Create((1, 2, 3, 4)).with_output_types(int) | combine.ToList()) - self.assertCompatible(typing.List[int], d.element_type) + self.assertCompatible(list[int], d.element_type) def matcher(expected): def match(actual): @@ -2635,7 +2627,7 @@ def test_to_list_runtime_check_satisfied(self): | beam.Create(list('test')).with_output_types(str) | combine.ToList()) - self.assertCompatible(typing.List[str], d.element_type) + self.assertCompatible(list[str], d.element_type) def matcher(expected): def match(actual): @@ -2661,11 +2653,10 @@ def test_to_dict_pipeline_check_violated(self): def test_to_dict_pipeline_check_satisfied(self): d = ( self.p - | beam.Create([(1, 2), - (3, 4)]).with_output_types(typing.Tuple[int, int]) + | beam.Create([(1, 2), (3, 4)]).with_output_types(tuple[int, int]) | combine.ToDict()) - self.assertCompatible(typing.Dict[int, int], d.element_type) + self.assertCompatible(dict[int, int], d.element_type) assert_that(d, equal_to([{1: 2, 3: 4}])) self.p.run() @@ -2674,12 +2665,11 @@ def test_to_dict_runtime_check_satisfied(self): d = ( self.p - | ( - beam.Create([('1', 2), - ('3', 4)]).with_output_types(typing.Tuple[str, int])) + | + (beam.Create([('1', 2), ('3', 4)]).with_output_types(tuple[str, int])) | combine.ToDict()) - self.assertCompatible(typing.Dict[str, int], d.element_type) + self.assertCompatible(dict[str, int], d.element_type) assert_that(d, equal_to([{'1': 2, '3': 4}])) self.p.run() diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py b/sdks/python/apache_beam/transforms/sideinputs_test.py index 9b79b9d1fa8d..e2af0d2c12f0 100644 --- a/sdks/python/apache_beam/transforms/sideinputs_test.py +++ b/sdks/python/apache_beam/transforms/sideinputs_test.py @@ -24,9 +24,7 @@ import logging import unittest from typing import Any -from typing import Dict from typing import Iterable -from typing import Tuple from typing import Union import pytest @@ -445,7 +443,7 @@ def test_side_input_with_sdf(self): class GetSyntheticSDFOptions(beam.DoFn): """A DoFn that emits elements for genenrating SDF.""" - def process(self, element: Any) -> Iterable[Dict[str, Union[int, str]]]: + def process(self, element: Any) -> Iterable[dict[str, Union[int, str]]]: yield { 'num_records': num_records // initial_elements, 'key_size': key_size, @@ -464,8 +462,8 @@ class SideInputTrackingDoFn(beam.DoFn): """ def process( self, element: Any, - side_input: Iterable[Tuple[bytes, - bytes]]) -> Iterable[Tuple[int, str]]: + side_input: Iterable[tuple[bytes, + bytes]]) -> Iterable[tuple[int, str]]: # Sort for consistent hashing. sorted_side_input = sorted(side_input) diff --git a/sdks/python/apache_beam/transforms/stats.py b/sdks/python/apache_beam/transforms/stats.py index fb38a883dd39..7cefe58dd133 100644 --- a/sdks/python/apache_beam/transforms/stats.py +++ b/sdks/python/apache_beam/transforms/stats.py @@ -28,6 +28,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import hashlib import heapq import itertools diff --git a/sdks/python/apache_beam/transforms/util.py b/sdks/python/apache_beam/transforms/util.py index 4e16b805184a..2ea9df9399cb 100644 --- a/sdks/python/apache_beam/transforms/util.py +++ b/sdks/python/apache_beam/transforms/util.py @@ -35,9 +35,7 @@ from collections.abc import Iterable from typing import TYPE_CHECKING from typing import Any -from typing import List from typing import Optional -from typing import Tuple from typing import TypeVar from typing import Union @@ -586,7 +584,7 @@ def setup(self): self.fernet = Fernet(self._hmac_key) def process(self, - element: Any) -> Iterable[Tuple[bytes, Tuple[bytes, bytes]]]: + element: Any) -> Iterable[tuple[bytes, tuple[bytes, bytes]]]: """Encrypts the key and value of an element. Args: @@ -622,7 +620,7 @@ def setup(self): hmac_key = self.hmac_key_secret.get_secret_bytes() self.fernet = Fernet(hmac_key) - def decode_value(self, encoded_element: Tuple[bytes, bytes]) -> Any: + def decode_value(self, encoded_element: tuple[bytes, bytes]) -> Any: encrypted_value = encoded_element[1] encoded_value = self.fernet.decrypt(encrypted_value) real_val = self.value_coder.decode(encoded_value) @@ -631,7 +629,7 @@ def decode_value(self, encoded_element: Tuple[bytes, bytes]) -> Any: def filter_elements_by_key( self, encrypted_key: bytes, - encoded_elements: Iterable[Tuple[bytes, bytes]]) -> Iterable[Any]: + encoded_elements: Iterable[tuple[bytes, bytes]]) -> Iterable[Any]: for e in encoded_elements: if encrypted_key == self.fernet.decrypt(e[0]): yield self.decode_value(e) @@ -640,8 +638,8 @@ def filter_elements_by_key( # here. This does mean that the whole list will be materialized every time, # but passing an Iterable containing an Iterable breaks when pickling happens def process( - self, element: Tuple[bytes, Iterable[Tuple[bytes, bytes]]] - ) -> Iterable[Tuple[Any, List[Any]]]: + self, element: tuple[bytes, Iterable[tuple[bytes, bytes]]] + ) -> Iterable[tuple[Any, list[Any]]]: """Decrypts the key and values of an element. Args: @@ -669,8 +667,8 @@ def process( list(self.filter_elements_by_key(encoded_key, encoded_elements))) -@typehints.with_input_types(Tuple[K, V]) -@typehints.with_output_types(Tuple[K, Iterable[V]]) +@typehints.with_input_types(tuple[K, V]) +@typehints.with_output_types(tuple[K, Iterable[V]]) class GroupByEncryptedKey(PTransform): """A PTransform that provides a secure alternative to GroupByKey. @@ -727,7 +725,7 @@ def expand(self, pcoll): gbk = beam.GroupByKey() gbk._inside_gbek = True - output_type = Tuple[key_type, Iterable[value_type]] + output_type = tuple[key_type, Iterable[value_type]] return ( pcoll diff --git a/sdks/python/apache_beam/transforms/validate_runner_xlang_test.py b/sdks/python/apache_beam/transforms/validate_runner_xlang_test.py index 72371b38fdf6..e50c8b064f7e 100644 --- a/sdks/python/apache_beam/transforms/validate_runner_xlang_test.py +++ b/sdks/python/apache_beam/transforms/validate_runner_xlang_test.py @@ -53,7 +53,6 @@ import logging import os import sys -import typing import unittest from datetime import datetime @@ -145,7 +144,7 @@ def run_group_by_key(self, pipeline): p | beam.Create([(0, "1"), (0, "2"), (1, "3")], reshuffle=False).with_output_types( - typing.Tuple[int, str]) + tuple[int, str]) | beam.ExternalTransform(TEST_GBK_URN, None, self.expansion_service) | beam.Map(lambda x: "{}:{}".format(x[0], ','.join(sorted(x[1]))))) assert_that(res, equal_to(['0:1,2', '1:3'])) @@ -165,7 +164,7 @@ def run_group_by_key_no_assert(self, pipeline): p | beam.Create([(0, "1"), (0, "2"), (1, "3")], reshuffle=False).with_output_types( - typing.Tuple[int, str]) + tuple[int, str]) | beam.ExternalTransform(TEST_GBK_URN, None, self.expansion_service)) def run_cogroup_by_key(self, pipeline): @@ -180,11 +179,11 @@ def run_cogroup_by_key(self, pipeline): """ with pipeline as p: col1 = p | 'create_col1' >> beam.Create( - [(0, "1"), (0, "2"), (1, "3")], reshuffle=False).with_output_types( - typing.Tuple[int, str]) + [(0, "1"), (0, "2"), + (1, "3")], reshuffle=False).with_output_types(tuple[int, str]) col2 = p | 'create_col2' >> beam.Create( - [(0, "4"), (1, "5"), (1, "6")], reshuffle=False).with_output_types( - typing.Tuple[int, str]) + [(0, "4"), (1, "5"), + (1, "6")], reshuffle=False).with_output_types(tuple[int, str]) res = ( dict(col1=col1, col2=col2) | beam.ExternalTransform(TEST_CGBK_URN, None, self.expansion_service) @@ -223,7 +222,7 @@ def run_combine_per_key(self, pipeline): res = ( p | beam.Create([('a', 1), ('a', 2), - ('b', 3)]).with_output_types(typing.Tuple[str, int]) + ('b', 3)]).with_output_types(tuple[str, int]) | beam.ExternalTransform( TEST_COMPK_URN, None, self.expansion_service)) assert_that(res, equal_to([('a', 3), ('b', 3)])) diff --git a/sdks/python/apache_beam/typehints/arrow_type_compatibility.py b/sdks/python/apache_beam/typehints/arrow_type_compatibility.py index ac0985f4b9c6..012c55388c3a 100644 --- a/sdks/python/apache_beam/typehints/arrow_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/arrow_type_compatibility.py @@ -20,6 +20,7 @@ For internal use only, no backward compatibility guarantees. """ +# ruff: noqa: UP006 from functools import partial from typing import Dict from typing import List diff --git a/sdks/python/apache_beam/typehints/batch_test.py b/sdks/python/apache_beam/typehints/batch_test.py index 3fbad76fce06..c62822edf406 100644 --- a/sdks/python/apache_beam/typehints/batch_test.py +++ b/sdks/python/apache_beam/typehints/batch_test.py @@ -17,6 +17,7 @@ """Unit tests for batched type converters.""" +# ruff: noqa: UP006 import contextlib import random import typing diff --git a/sdks/python/apache_beam/typehints/decorators.py b/sdks/python/apache_beam/typehints/decorators.py index e393113c002e..bc44ec745d5a 100644 --- a/sdks/python/apache_beam/typehints/decorators.py +++ b/sdks/python/apache_beam/typehints/decorators.py @@ -79,6 +79,7 @@ def foo((a, b)): # pytype: skip-file +# ruff: noqa: UP006 import inspect import itertools import logging diff --git a/sdks/python/apache_beam/typehints/decorators_test.py b/sdks/python/apache_beam/typehints/decorators_test.py index 95745f4e3d88..73efea9e9abf 100644 --- a/sdks/python/apache_beam/typehints/decorators_test.py +++ b/sdks/python/apache_beam/typehints/decorators_test.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import functools import typing import unittest diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility.py b/sdks/python/apache_beam/typehints/native_type_compatibility.py index f1f58f8c2bea..7f28d57b2f81 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import collections import collections.abc import dataclasses diff --git a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py index e9e101bb13da..33d6051afc7a 100644 --- a/sdks/python/apache_beam/typehints/native_type_compatibility_test.py +++ b/sdks/python/apache_beam/typehints/native_type_compatibility_test.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import collections.abc import dataclasses import enum diff --git a/sdks/python/apache_beam/typehints/pandas_type_compatibility.py b/sdks/python/apache_beam/typehints/pandas_type_compatibility.py index 9fbfd5c3ddfc..45ae27baffe7 100644 --- a/sdks/python/apache_beam/typehints/pandas_type_compatibility.py +++ b/sdks/python/apache_beam/typehints/pandas_type_compatibility.py @@ -51,6 +51,7 @@ compatibility guarantees, except for the type mapping itself. """ +# ruff: noqa: UP006 from typing import Any from typing import List from typing import Optional diff --git a/sdks/python/apache_beam/typehints/row_type.py b/sdks/python/apache_beam/typehints/row_type.py index 0697581cb435..5e0faf91f55b 100644 --- a/sdks/python/apache_beam/typehints/row_type.py +++ b/sdks/python/apache_beam/typehints/row_type.py @@ -17,6 +17,7 @@ # pytype: skip-file +# ruff: noqa: UP006 from __future__ import annotations import dataclasses diff --git a/sdks/python/apache_beam/typehints/schemas.py b/sdks/python/apache_beam/typehints/schemas.py index eb2990d4222e..2839edf4f91f 100644 --- a/sdks/python/apache_beam/typehints/schemas.py +++ b/sdks/python/apache_beam/typehints/schemas.py @@ -67,6 +67,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import datetime import decimal import logging diff --git a/sdks/python/apache_beam/typehints/schemas_test.py b/sdks/python/apache_beam/typehints/schemas_test.py index 8032e4701c25..b0687f50f797 100644 --- a/sdks/python/apache_beam/typehints/schemas_test.py +++ b/sdks/python/apache_beam/typehints/schemas_test.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import dataclasses import datetime import itertools diff --git a/sdks/python/apache_beam/typehints/typecheck_test.py b/sdks/python/apache_beam/typehints/typecheck_test.py index c2eaa0f6f9f7..81046cd3cc0f 100644 --- a/sdks/python/apache_beam/typehints/typecheck_test.py +++ b/sdks/python/apache_beam/typehints/typecheck_test.py @@ -23,6 +23,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import os import tempfile import unittest diff --git a/sdks/python/apache_beam/typehints/typed_pipeline_test.py b/sdks/python/apache_beam/typehints/typed_pipeline_test.py index 820f78fa9ef5..c97a1977e9c3 100644 --- a/sdks/python/apache_beam/typehints/typed_pipeline_test.py +++ b/sdks/python/apache_beam/typehints/typed_pipeline_test.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import typing import unittest from typing import Tuple diff --git a/sdks/python/apache_beam/typehints/typehints.py b/sdks/python/apache_beam/typehints/typehints.py index f429935c3a0e..eec9ea86bd4c 100644 --- a/sdks/python/apache_beam/typehints/typehints.py +++ b/sdks/python/apache_beam/typehints/typehints.py @@ -65,6 +65,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import copy import logging import types diff --git a/sdks/python/apache_beam/typehints/typehints_test.py b/sdks/python/apache_beam/typehints/typehints_test.py index 992c129fd8a5..a335ab05f1b7 100644 --- a/sdks/python/apache_beam/typehints/typehints_test.py +++ b/sdks/python/apache_beam/typehints/typehints_test.py @@ -19,6 +19,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import collections.abc import functools import re diff --git a/sdks/python/apache_beam/utils/counters.py b/sdks/python/apache_beam/utils/counters.py index 57d73fa283eb..3281c286b8fb 100644 --- a/sdks/python/apache_beam/utils/counters.py +++ b/sdks/python/apache_beam/utils/counters.py @@ -25,6 +25,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import threading from collections import namedtuple from typing import TYPE_CHECKING diff --git a/sdks/python/apache_beam/utils/multi_process_shared.py b/sdks/python/apache_beam/utils/multi_process_shared.py index de4b94bc5da3..b05fdd305a60 100644 --- a/sdks/python/apache_beam/utils/multi_process_shared.py +++ b/sdks/python/apache_beam/utils/multi_process_shared.py @@ -32,7 +32,6 @@ import traceback from typing import Any from typing import Callable -from typing import Dict from typing import Generic from typing import Optional from typing import TypeVar @@ -156,7 +155,7 @@ def unsafe_hard_delete(self): class _SingletonManager: - entries: Dict[Any, Any] = {} + entries: dict[Any, Any] = {} def __init__(self): self._hard_delete_callback = None diff --git a/sdks/python/apache_beam/utils/proto_utils.py b/sdks/python/apache_beam/utils/proto_utils.py index 60c0af2ebac0..89a6f8a4648c 100644 --- a/sdks/python/apache_beam/utils/proto_utils.py +++ b/sdks/python/apache_beam/utils/proto_utils.py @@ -19,7 +19,6 @@ # pytype: skip-file -from typing import Type from typing import TypeVar from typing import Union from typing import overload @@ -64,7 +63,7 @@ def pack_Any(msg): @overload -def unpack_Any(any_msg: any_pb2.Any, msg_class: Type[MessageT]) -> MessageT: +def unpack_Any(any_msg: any_pb2.Any, msg_class: type[MessageT]) -> MessageT: pass @@ -86,13 +85,13 @@ def unpack_Any(any_msg, msg_class): @overload -def parse_Bytes(serialized_bytes: bytes, msg_class: Type[MessageT]) -> MessageT: +def parse_Bytes(serialized_bytes: bytes, msg_class: type[MessageT]) -> MessageT: pass @overload def parse_Bytes( - serialized_bytes: bytes, msg_class: Union[Type[bytes], None]) -> bytes: + serialized_bytes: bytes, msg_class: Union[type[bytes], None]) -> bytes: pass @@ -116,7 +115,7 @@ def pack_Struct(**kwargs) -> struct_pb2.Struct: return msg -def from_micros(cls: Type[TimeMessageT], micros: int) -> TimeMessageT: +def from_micros(cls: type[TimeMessageT], micros: int) -> TimeMessageT: result = cls() if isinstance(result, duration_pb2.Duration): result.FromMicroseconds(micros) diff --git a/sdks/python/apache_beam/utils/subprocess_server.py b/sdks/python/apache_beam/utils/subprocess_server.py index ff1a0d9c46aa..988bd680b923 100644 --- a/sdks/python/apache_beam/utils/subprocess_server.py +++ b/sdks/python/apache_beam/utils/subprocess_server.py @@ -32,7 +32,6 @@ import time import zipfile from typing import Any -from typing import Set from urllib.error import URLError from urllib.request import Request from urllib.request import urlopen @@ -49,7 +48,7 @@ @dataclasses.dataclass class _SharedCacheEntry: obj: Any - owners: Set[str] + owners: set[str] class _SharedCache: diff --git a/sdks/python/apache_beam/utils/urns.py b/sdks/python/apache_beam/utils/urns.py index a8074137d178..8732df16db12 100644 --- a/sdks/python/apache_beam/utils/urns.py +++ b/sdks/python/apache_beam/utils/urns.py @@ -26,10 +26,7 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable -from typing import Dict from typing import Optional -from typing import Tuple -from typing import Type from typing import TypeVar from typing import Union from typing import overload @@ -65,7 +62,7 @@ class RunnerApiFn(object): # classes + abc metaclass # __metaclass__ = abc.ABCMeta - _known_urns: Dict[str, Tuple[Optional[type], ConstructorFn]] = {} + _known_urns: dict[str, tuple[Optional[type], ConstructorFn]] = {} # @abc.abstractmethod is disabled here to avoid an error with mypy. mypy # performs abc.abtractmethod/property checks even if a class does @@ -75,7 +72,7 @@ class RunnerApiFn(object): # concrete implementation. # @abc.abstractmethod def to_runner_api_parameter( - self, unused_context: 'PipelineContext') -> Tuple[str, Any]: + self, unused_context: 'PipelineContext') -> tuple[str, Any]: """Returns the urn and payload for this Fn. The returned urn(s) should be registered with `register_urn`. @@ -87,7 +84,7 @@ def to_runner_api_parameter( def register_urn( cls, urn: str, - parameter_type: Type[T], + parameter_type: type[T], ) -> Callable[[Callable[[T, 'PipelineContext'], Any]], Callable[[T, 'PipelineContext'], Any]]: pass @@ -107,7 +104,7 @@ def register_urn( def register_urn( cls, urn: str, - parameter_type: Type[T], + parameter_type: type[T], fn: Callable[[T, 'PipelineContext'], Any]) -> None: pass @@ -170,7 +167,7 @@ def to_runner_api( @classmethod def from_runner_api( - cls: Type[RunnerApiFnT], + cls: type[RunnerApiFnT], fn_proto: beam_runner_api_pb2.FunctionSpec, context: 'PipelineContext') -> RunnerApiFnT: """Converts from an FunctionSpec to a Fn object. diff --git a/sdks/python/apache_beam/utils/windowed_value.py b/sdks/python/apache_beam/utils/windowed_value.py index 305e66b85940..6a0987611f5c 100644 --- a/sdks/python/apache_beam/utils/windowed_value.py +++ b/sdks/python/apache_beam/utils/windowed_value.py @@ -24,6 +24,7 @@ # pytype: skip-file +# ruff: noqa: UP006 import collections from typing import TYPE_CHECKING from typing import Any diff --git a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py index ef900090c393..fb81d67da293 100644 --- a/sdks/python/apache_beam/yaml/examples/testing/examples_test.py +++ b/sdks/python/apache_beam/yaml/examples/testing/examples_test.py @@ -26,8 +26,6 @@ import unittest from typing import Any from typing import Callable -from typing import Dict -from typing import List from typing import Optional from typing import Union from unittest import mock @@ -58,7 +56,7 @@ def test_enrichment( pcoll, enrichment_handler: str, - handler_config: Dict[str, Any], + handler_config: dict[str, Any], timeout: Optional[float] = 30): """ Mocks the Enrichment transform for testing purposes. @@ -163,7 +161,7 @@ def test_pubsub_read( subscription: Optional[str] = None, format: Optional[str] = None, schema: Optional[Any] = None, - attributes: Optional[List[str]] = None, + attributes: Optional[list[str]] = None, attributes_map: Optional[str] = None, id_attribute: Optional[str] = None, timestamp_attribute: Optional[str] = None): @@ -287,7 +285,7 @@ def _format_predicition_result_ouput(pcoll, inference_tag): INPUT_TRANSFORM_TEST_PROVIDERS = ['TestReadFromKafka', 'TestReadFromPubSub'] -def check_output(expected: List[str]): +def check_output(expected: list[str]): """ Helper function to check the output of a pipeline against expected values. @@ -302,7 +300,7 @@ def check_output(expected: List[str]): A callable that takes a list of PCollections and asserts their combined elements match the expected output. """ - def _check_inner(actual: List[PCollection[str]]): + def _check_inner(actual: list[PCollection[str]]): formatted_actual = actual | beam.Flatten() | beam.Map( lambda row: str(beam.Row(**row._asdict()))) assert_matches_stdout(formatted_actual, expected) @@ -312,7 +310,7 @@ def _check_inner(actual: List[PCollection[str]]): def create_test_method( pipeline_spec_file: str, - custom_preprocessors: List[Callable[..., Union[Dict, List]]]): + custom_preprocessors: list[Callable[..., Union[dict, list]]]): """ Generates a test method for a given YAML pipeline specification file. @@ -426,7 +424,7 @@ class YamlExamplesTestSuite: files and dynamically generate a Python test method. Additionally, it creates a method to complete some preprocessing for mocking IO. """ - _test_preprocessor: Dict[str, List[Callable[..., Union[Dict, List]]]] = {} + _test_preprocessor: dict[str, list[Callable[..., Union[dict, list]]]] = {} def __init__(self, name: str, path: str): """ @@ -501,7 +499,7 @@ def create_test_suite(cls, name: str, path: str): name, (unittest.TestCase, ), dict(cls.parse_test_methods(path))) @classmethod - def register_test_preprocessor(cls, test_names: Union[str, List]): + def register_test_preprocessor(cls, test_names: Union[str, list]): """Decorator to register a preprocessor function for specific tests. This decorator is used to associate a preprocessor function with one or @@ -534,7 +532,7 @@ def apply(preprocessor): @YamlExamplesTestSuite.register_test_preprocessor( ['test_wordcount_minimal_yaml']) def _wordcount_minimal_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for the wordcount_minimal.yaml test. @@ -569,7 +567,7 @@ def _wordcount_minimal_test_preprocessor( 'test_wordCountInheritance_yaml' ]) def _wordcount_jinja_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for the wordcount Jinja tests. @@ -599,7 +597,7 @@ def _wordcount_jinja_test_preprocessor( def _wordcount_random_shuffler( - test_spec: dict, all_words: List[str], env: TestEnvironment): + test_spec: dict, all_words: list[str], env: TestEnvironment): """ Helper function to create a randomized input file for wordcount-style tests. @@ -638,7 +636,7 @@ def _wordcount_random_shuffler( @YamlExamplesTestSuite.register_test_preprocessor( ['test_kafka_yaml', 'test_kafka_to_iceberg_yaml']) def _kafka_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): test_spec = replace_recursive( test_spec, @@ -687,7 +685,7 @@ def _kafka_test_preprocessor( 'test_bigquery_write_yaml' ]) def _io_write_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve writing to IO. @@ -724,7 +722,7 @@ def _io_write_test_preprocessor( 'test_gcs_text_to_bigquery_yaml' ]) def _file_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ This preprocessor replaces any file IO ReadFrom transform with a Create transform that reads from a predefined in-memory dictionary. This allows @@ -758,7 +756,7 @@ def _file_io_read_test_preprocessor( @YamlExamplesTestSuite.register_test_preprocessor( ['test_iceberg_read_yaml', 'test_iceberg_to_alloydb_yaml']) def _iceberg_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from Iceberg. @@ -804,7 +802,7 @@ def _iceberg_io_read_test_preprocessor( 'test_spanner_to_bigquery_yaml' ]) def _spanner_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from Spanner. @@ -855,7 +853,7 @@ def _spanner_io_read_test_preprocessor( @YamlExamplesTestSuite.register_test_preprocessor( ['test_bigtable_enrichment_yaml', 'test_enrich_spanner_with_bigquery_yaml']) def _enrichment_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve the Enrichment transform. @@ -887,7 +885,7 @@ def _enrichment_test_preprocessor( 'test_pubsub_to_iceberg_yaml' ]) def _pubsub_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from Pub/Sub. This preprocessor replaces any ReadFromPubSub transform with a Create @@ -908,7 +906,7 @@ def _pubsub_io_read_test_preprocessor( 'test_jdbc_to_bigquery_yaml', ]) def _jdbc_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from generic Jdbc. url syntax: 'jdbc:://:/' @@ -921,7 +919,7 @@ def _jdbc_io_read_test_preprocessor( 'test_sqlserver_to_bigquery_yaml', ]) def __sqlserver_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from SqlServer. url syntax: 'jdbc:sqlserver://:;databaseName=; @@ -935,7 +933,7 @@ def __sqlserver_io_read_test_preprocessor( 'test_postgres_to_bigquery_yaml', ]) def __postgres_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from Postgres. url syntax: 'jdbc:postgresql://:/shipment?user=& @@ -949,7 +947,7 @@ def __postgres_io_read_test_preprocessor( 'test_oracle_to_bigquery_yaml', ]) def __oracle_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from Oracle. url syntax: 'jdbc:oracle:thin:system/oracle@:{port}/' @@ -962,7 +960,7 @@ def __oracle_io_read_test_preprocessor( 'test_mysql_to_bigquery_yaml', ]) def __mysql_io_read_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve reading from MySql. url syntax: 'jdbc:mysql://:/?user=& @@ -1014,7 +1012,7 @@ def _db_io_read_test_processor( @YamlExamplesTestSuite.register_test_preprocessor( 'test_streaming_sentiment_analysis_yaml') def _streaming_sentiment_analysis_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve the streaming sentiment analysis example. @@ -1072,7 +1070,7 @@ def _streaming_sentiment_analysis_test_preprocessor( @YamlExamplesTestSuite.register_test_preprocessor( 'test_streaming_taxifare_prediction_yaml') def _streaming_taxifare_prediction_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve the streaming taxi fare prediction example. @@ -1158,7 +1156,7 @@ def _streaming_taxifare_prediction_test_preprocessor( 'test_anomaly_scoring_yaml' ]) def _batch_log_analysis_test_preprocessor( - test_spec: dict, expected: List[str], env: TestEnvironment): + test_spec: dict, expected: list[str], env: TestEnvironment): """ Preprocessor for tests that involve the batch log analysis example. diff --git a/sdks/python/apache_beam/yaml/yaml_testing.py b/sdks/python/apache_beam/yaml/yaml_testing.py index 10d23dd54868..c7f5f5f4e93c 100644 --- a/sdks/python/apache_beam/yaml/yaml_testing.py +++ b/sdks/python/apache_beam/yaml/yaml_testing.py @@ -21,11 +21,8 @@ import random import unittest import uuid -from typing import Dict -from typing import List from typing import Mapping from typing import Optional -from typing import Tuple from typing import TypeVar from typing import Union @@ -539,11 +536,11 @@ def _first_n(transform_spec, options, n, providers=None): K2 = TypeVar('K2') V = TypeVar('V') -InputsType = Dict[str, Union[str, List[str]]] +InputsType = dict[str, Union[str, list[str]]] def _composite_key_to_nested( - d: Mapping[Tuple[K1, K2], V]) -> Mapping[K1, Mapping[K2, V]]: + d: Mapping[tuple[K1, K2], V]) -> Mapping[K1, Mapping[K2, V]]: nested = collections.defaultdict(dict) for (k1, k2), v in d.items(): nested[k1][k2] = v diff --git a/sdks/python/apache_beam/yaml/yaml_utils.py b/sdks/python/apache_beam/yaml/yaml_utils.py index 6e8a97c8b4f8..7a7d985f8327 100644 --- a/sdks/python/apache_beam/yaml/yaml_utils.py +++ b/sdks/python/apache_beam/yaml/yaml_utils.py @@ -22,7 +22,6 @@ from collections.abc import Iterable from collections.abc import Mapping from typing import Any -from typing import Tuple import yaml from yaml import SafeLoader @@ -150,7 +149,7 @@ def wrapper(self, node): # This (recursively) finds the portion of the original string that must # be replaced with new content. - def diff(a: Any, b: Any) -> Iterable[Tuple[int, int, str]]: + def diff(a: Any, b: Any) -> Iterable[tuple[int, int, str]]: if a == b: return elif (isinstance(a, dict) and isinstance(b, dict) and diff --git a/sdks/python/gen_managed_doc.py b/sdks/python/gen_managed_doc.py index 75301d6a7bb5..d492caba23bc 100644 --- a/sdks/python/gen_managed_doc.py +++ b/sdks/python/gen_managed_doc.py @@ -22,7 +22,6 @@ import argparse import os import re -from typing import Dict import yaml from gen_protos import PROJECT_ROOT @@ -125,7 +124,7 @@ def generate_managed_doc(output_location): for gradle_target in expansion_service_jar_targets: provider = ExternalTransformProvider(BeamJarExpansionService(gradle_target)) - discovered: Dict[str, ExternalTransform] = provider.get_all() + discovered: dict[str, ExternalTransform] = provider.get_all() for identifier, transform in discovered.items(): if identifier in read_names_and_identifiers.values(): diff --git a/sdks/python/gen_xlang_wrappers.py b/sdks/python/gen_xlang_wrappers.py index 3176b74e836a..62d30042fc04 100644 --- a/sdks/python/gen_xlang_wrappers.py +++ b/sdks/python/gen_xlang_wrappers.py @@ -29,8 +29,6 @@ import subprocess import typing from typing import Any -from typing import Dict -from typing import List from typing import Union import yaml @@ -117,7 +115,7 @@ class name. This can be overriden by manually providing a name. from apache_beam.transforms.external_transform_provider import ExternalTransform from apache_beam.transforms.external_transform_provider import ExternalTransformProvider - transform_list: List[Dict[str, Any]] = [] + transform_list: list[dict[str, Any]] = [] with open(input_services) as f: services = yaml.safe_load(f) @@ -128,7 +126,7 @@ class name. This can be overriden by manually providing a name. raise ValueError( f"Expansion service with target '{target}' does not " "specify any default destinations.") - service_destinations: Dict[str, str] = service['destinations'] + service_destinations: dict[str, str] = service['destinations'] for sdk, dest in service_destinations.items(): validate_sdks_destinations(sdk, dest, target) @@ -136,7 +134,7 @@ class name. This can be overriden by manually providing a name. # use dynamic provider to discover and populate wrapper details provider = ExternalTransformProvider(BeamJarExpansionService(target)) - discovered: Dict[str, ExternalTransform] = provider.get_all() + discovered: dict[str, ExternalTransform] = provider.get_all() for identifier in sorted(discovered.keys()): wrapper = discovered[identifier] if identifier in transforms_to_skip: @@ -227,7 +225,11 @@ def pretty_type(tp): # TODO(ahmedabu98): Make this more generic to support other remote SDKs # Potentially use Runner API types if tp.__module__ == 'builtins': - tp = tp.__name__ + if getattr(tp, '__origin__', None) is None: + tp = tp.__name__ + else: + # Remove nested typing module name (like Optional) + tp = str(tp).replace("typing.", "") elif tp.__module__ == 'typing': tp = str(tp).replace("typing.", "") tp = tp.replace("Sequence", "list") @@ -242,7 +244,7 @@ def pretty_type(tp): return (tp, nullable) -def get_wrappers_from_transform_configs(config_file) -> Dict[str, List[str]]: +def get_wrappers_from_transform_configs(config_file) -> dict[str, list[str]]: """ Generates code for external transform wrapper classes (subclasses of :class:`ExternalTransform`). @@ -264,7 +266,7 @@ def get_wrappers_from_transform_configs(config_file) -> Dict[str, List[str]]: # maintain a list of wrappers to write in each file. if modified destinations # are used, we may end up with multiple wrappers in one file. - destinations: Dict[str, List[str]] = {} + destinations: dict[str, list[str]] = {} with open(config_file) as f: transforms = yaml.safe_load(f) @@ -308,7 +310,7 @@ def get_wrappers_from_transform_configs(config_file) -> Dict[str, List[str]]: def write_wrappers_to_destinations( - grouped_wrappers: Dict[str, List[str]], + grouped_wrappers: dict[str, list[str]], output_dir=PY_WRAPPER_OUTPUT_DIR, format_code=True): """ diff --git a/sdks/python/ruff.toml b/sdks/python/ruff.toml index 5a862e013f12..c0f896c21721 100644 --- a/sdks/python/ruff.toml +++ b/sdks/python/ruff.toml @@ -56,7 +56,7 @@ target-version = "py310" src = ["apache_beam"] [lint] -select = ["E9", "PL", "F821", "F822", "F823"] +select = ["E9", "PL", "F821", "F822", "F823", "UP006"] ignore = [ # Ignored Pylint Checks "PLC0415", # import-outside-toplevel