Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ Result:
- [Request Options](#request-options)
- [Adapter](#adapter)
- [Timeout](#timeout)
- [Provisioned Throughput](#provisioned-throughput)
- [Error Handling](#error-handling)
- [Rescuing](#rescuing)
- [For Short](#for-short)
Expand Down Expand Up @@ -1374,6 +1375,41 @@ client = Gemini.new(
)
```

### Provisioned Throughput

You can configure provisioned throughput for your requests to ensure consistent performance and availability. This feature allows you to specify how your requests should be handled in terms of resource allocation.

For more detailed information about Provisioned Throughput, see the [official Google Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/provisioned-throughput/use-provisioned-throughput?hl=en).

#### Configuration Options

The `provisioned_throughput` option accepts one of three values:

- `dedicated`: Uses dedicated resources for your requests
- `shared`: Uses shared resources with other users
- `spillover`: Uses spillover resources when available

Invalid values will raise an `InvalidProvisionedThroughputError`.

#### Example Usage

```ruby
client = Gemini.new(
credentials: {
service: 'vertex-ai-api',
region: 'your-provisioned-throughput-region'
},
options: {
model: 'your-provisioned-throughput-model',
provisioned_throughput: 'dedicated' # or 'shared', 'spillover'
}
)
```

#### Notes

- Provisioned Throughput is only available for the `vertex-ai-api` service
- Using this configuration without purchasing and activating Provisioned Throughput may result in 429 errors

### Error Handling

Expand Down Expand Up @@ -1424,6 +1460,7 @@ MissingProjectIdError
UnsupportedServiceError
ConflictingCredentialsError
BlockWithoutServerSentEventsError
InvalidProvisionedThroughputError

RequestError
```
Expand Down
1 change: 1 addition & 0 deletions components/errors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class MissingProjectIdError < GeminiError; end
class UnsupportedServiceError < GeminiError; end
class ConflictingCredentialsError < GeminiError; end
class BlockWithoutServerSentEventsError < GeminiError; end
class InvalidProvisionedThroughputError < GeminiError; end

class RequestError < GeminiError
attr_reader :request, :payload
Expand Down
35 changes: 35 additions & 0 deletions controllers/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ def initialize(config)
else
{}
end

@provisioned_throughput = if @service == 'vertex-ai-api'
config.dig(:options, :provisioned_throughput)
else
nil
end
validate_provisioned_throughput! if @provisioned_throughput
end

def avoid_conflicting_credentials!(credentials)
Expand All @@ -100,6 +107,29 @@ def avoid_conflicting_credentials!(credentials)
"You must choose either #{message}."
end

def validate_provisioned_throughput!
return unless @provisioned_throughput

unless @provisioned_throughput.is_a?(String)
raise Errors::InvalidProvisionedThroughputError,
'provisioned_throughput must be a string with one of: dedicated, shared, spillover'
end

valid_configs = %w[dedicated shared spillover]
unless valid_configs.include?(@provisioned_throughput)
raise Errors::InvalidProvisionedThroughputError,
"Invalid config '#{@provisioned_throughput}'. Must be one of: #{valid_configs.join(', ')}"
end
end

def build_provisioned_throughput_headers
return {} unless @provisioned_throughput

{
'X-Vertex-AI-LLM-Request-Type' => @provisioned_throughput
}
end

def predict(payload, server_sent_events: nil, &callback)
result = request(
"#{@model_address}:predict", payload,
Expand Down Expand Up @@ -186,10 +216,15 @@ def request(path, payload, server_sent_events: nil, request_method: 'POST', &cal
end.send(method_to_call) do |request|
request.url url
request.headers['Content-Type'] = 'application/json'

if @authentication == :service_account || @authentication == :default_credentials
request.headers['Authorization'] = "Bearer #{@authorizer.fetch_access_token!['access_token']}"
end


provisioned_headers = build_provisioned_throughput_headers
provisioned_headers.each { |key, value| request.headers[key] = value }

request.body = payload.to_json unless payload.nil?

if server_sent_events_enabled
Expand Down
94 changes: 94 additions & 0 deletions spec/controllers/client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,98 @@
"You must choose either 'file_contents', or 'file_path'."
)
end

describe 'provisioned throughput configuration' do
let(:vertex_ai_config) do
{
credentials: {
service: 'vertex-ai-api',
project_id: 'test-project',
region: 'us-central1',
api_key: 'test-api-key'
},
options: {
model: 'gemini-1.5-pro'
}
}
end

let(:generative_language_config) do
{
credentials: {
service: 'generative-language-api',
api_key: 'test-api-key'
},
options: {
model: 'gemini-1.5-pro'
}
}
end

context 'with vertex-ai-api service' do
it 'accepts valid provisioned throughput configurations' do
%w[dedicated shared spillover].each do |config|
expect do
described_class.new(vertex_ai_config.merge(
options: vertex_ai_config[:options].merge(provisioned_throughput: config)
))
end.not_to raise_error
end
end

it 'rejects invalid provisioned throughput configurations' do
expect do
described_class.new(vertex_ai_config.merge(
options: vertex_ai_config[:options].merge(provisioned_throughput: 'invalid')
))
end.to raise_error(
Gemini::Errors::InvalidProvisionedThroughputError,
"Invalid config 'invalid'. Must be one of: dedicated, shared, spillover"
)
end

it 'rejects non-string provisioned throughput configurations' do
expect do
described_class.new(vertex_ai_config.merge(
options: vertex_ai_config[:options].merge(provisioned_throughput: { config: 'dedicated' })
))
end.to raise_error(
Gemini::Errors::InvalidProvisionedThroughputError,
'provisioned_throughput must be a string with one of: dedicated, shared, spillover'
)
end

it 'allows nil provisioned throughput configuration' do
expect do
described_class.new(vertex_ai_config)
end.not_to raise_error
end
end

context 'with generative-language-api service' do
it 'ignores provisioned throughput configuration' do
expect do
described_class.new(generative_language_config.merge(
options: generative_language_config[:options].merge(provisioned_throughput: 'dedicated')
))
end.not_to raise_error
end

it 'ignores invalid provisioned throughput configuration' do
expect do
described_class.new(generative_language_config.merge(
options: generative_language_config[:options].merge(provisioned_throughput: 'invalid')
))
end.not_to raise_error
end

it 'ignores non-string provisioned throughput configuration' do
expect do
described_class.new(generative_language_config.merge(
options: generative_language_config[:options].merge(provisioned_throughput: { config: 'dedicated' })
))
end.not_to raise_error
end
end
end
end