diff --git a/README.md b/README.md index 55a4b80..c5a5394 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ Result: - [Request Options](#request-options) - [Adapter](#adapter) - [Timeout](#timeout) + - [Provisioned Throughput](#provisioned-throughput) - [Error Handling](#error-handling) - [Rescuing](#rescuing) - [For Short](#for-short) @@ -1374,6 +1375,41 @@ client = Gemini.new( ) ``` +### Provisioned Throughput + +You can configure provisioned throughput for your requests to ensure consistent performance and availability. This feature allows you to specify how your requests should be handled in terms of resource allocation. + +For more detailed information about Provisioned Throughput, see the [official Google Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/provisioned-throughput/use-provisioned-throughput?hl=en). + +#### Configuration Options + +The `provisioned_throughput` option accepts one of three values: + +- `dedicated`: Uses dedicated resources for your requests +- `shared`: Uses shared resources with other users +- `spillover`: Uses spillover resources when available + +Invalid values will raise an `InvalidProvisionedThroughputError`. + +#### Example Usage + +```ruby +client = Gemini.new( + credentials: { + service: 'vertex-ai-api', + region: 'your-provisioned-throughput-region' + }, + options: { + model: 'your-provisioned-throughput-model', + provisioned_throughput: 'dedicated' # or 'shared', 'spillover' + } +) +``` + +#### Notes + +- Provisioned Throughput is only available for the `vertex-ai-api` service +- Using this configuration without purchasing and activating Provisioned Throughput may result in 429 errors ### Error Handling @@ -1424,6 +1460,7 @@ MissingProjectIdError UnsupportedServiceError ConflictingCredentialsError BlockWithoutServerSentEventsError +InvalidProvisionedThroughputError RequestError ``` diff --git a/components/errors.rb b/components/errors.rb index f35363c..516b6d9 100644 --- a/components/errors.rb +++ b/components/errors.rb @@ -12,6 +12,7 @@ class MissingProjectIdError < GeminiError; end class UnsupportedServiceError < GeminiError; end class ConflictingCredentialsError < GeminiError; end class BlockWithoutServerSentEventsError < GeminiError; end + class InvalidProvisionedThroughputError < GeminiError; end class RequestError < GeminiError attr_reader :request, :payload diff --git a/controllers/client.rb b/controllers/client.rb index 96c1c3b..52f2817 100644 --- a/controllers/client.rb +++ b/controllers/client.rb @@ -83,6 +83,13 @@ def initialize(config) else {} end + + @provisioned_throughput = if @service == 'vertex-ai-api' + config.dig(:options, :provisioned_throughput) + else + nil + end + validate_provisioned_throughput! if @provisioned_throughput end def avoid_conflicting_credentials!(credentials) @@ -100,6 +107,29 @@ def avoid_conflicting_credentials!(credentials) "You must choose either #{message}." end + def validate_provisioned_throughput! + return unless @provisioned_throughput + + unless @provisioned_throughput.is_a?(String) + raise Errors::InvalidProvisionedThroughputError, + 'provisioned_throughput must be a string with one of: dedicated, shared, spillover' + end + + valid_configs = %w[dedicated shared spillover] + unless valid_configs.include?(@provisioned_throughput) + raise Errors::InvalidProvisionedThroughputError, + "Invalid config '#{@provisioned_throughput}'. Must be one of: #{valid_configs.join(', ')}" + end + end + + def build_provisioned_throughput_headers + return {} unless @provisioned_throughput + + { + 'X-Vertex-AI-LLM-Request-Type' => @provisioned_throughput + } + end + def predict(payload, server_sent_events: nil, &callback) result = request( "#{@model_address}:predict", payload, @@ -186,10 +216,15 @@ def request(path, payload, server_sent_events: nil, request_method: 'POST', &cal end.send(method_to_call) do |request| request.url url request.headers['Content-Type'] = 'application/json' + if @authentication == :service_account || @authentication == :default_credentials request.headers['Authorization'] = "Bearer #{@authorizer.fetch_access_token!['access_token']}" end + + provisioned_headers = build_provisioned_throughput_headers + provisioned_headers.each { |key, value| request.headers[key] = value } + request.body = payload.to_json unless payload.nil? if server_sent_events_enabled diff --git a/spec/controllers/client_spec.rb b/spec/controllers/client_spec.rb index 6c7df9c..c3b3fef 100644 --- a/spec/controllers/client_spec.rb +++ b/spec/controllers/client_spec.rb @@ -45,4 +45,98 @@ "You must choose either 'file_contents', or 'file_path'." ) end + + describe 'provisioned throughput configuration' do + let(:vertex_ai_config) do + { + credentials: { + service: 'vertex-ai-api', + project_id: 'test-project', + region: 'us-central1', + api_key: 'test-api-key' + }, + options: { + model: 'gemini-1.5-pro' + } + } + end + + let(:generative_language_config) do + { + credentials: { + service: 'generative-language-api', + api_key: 'test-api-key' + }, + options: { + model: 'gemini-1.5-pro' + } + } + end + + context 'with vertex-ai-api service' do + it 'accepts valid provisioned throughput configurations' do + %w[dedicated shared spillover].each do |config| + expect do + described_class.new(vertex_ai_config.merge( + options: vertex_ai_config[:options].merge(provisioned_throughput: config) + )) + end.not_to raise_error + end + end + + it 'rejects invalid provisioned throughput configurations' do + expect do + described_class.new(vertex_ai_config.merge( + options: vertex_ai_config[:options].merge(provisioned_throughput: 'invalid') + )) + end.to raise_error( + Gemini::Errors::InvalidProvisionedThroughputError, + "Invalid config 'invalid'. Must be one of: dedicated, shared, spillover" + ) + end + + it 'rejects non-string provisioned throughput configurations' do + expect do + described_class.new(vertex_ai_config.merge( + options: vertex_ai_config[:options].merge(provisioned_throughput: { config: 'dedicated' }) + )) + end.to raise_error( + Gemini::Errors::InvalidProvisionedThroughputError, + 'provisioned_throughput must be a string with one of: dedicated, shared, spillover' + ) + end + + it 'allows nil provisioned throughput configuration' do + expect do + described_class.new(vertex_ai_config) + end.not_to raise_error + end + end + + context 'with generative-language-api service' do + it 'ignores provisioned throughput configuration' do + expect do + described_class.new(generative_language_config.merge( + options: generative_language_config[:options].merge(provisioned_throughput: 'dedicated') + )) + end.not_to raise_error + end + + it 'ignores invalid provisioned throughput configuration' do + expect do + described_class.new(generative_language_config.merge( + options: generative_language_config[:options].merge(provisioned_throughput: 'invalid') + )) + end.not_to raise_error + end + + it 'ignores non-string provisioned throughput configuration' do + expect do + described_class.new(generative_language_config.merge( + options: generative_language_config[:options].merge(provisioned_throughput: { config: 'dedicated' }) + )) + end.not_to raise_error + end + end + end end