Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,9 @@ com.squareup.retrofit2:retrofit
com.squareup.okhttp3:okhttp
org.apache.kafka:kafka-clients
org.xerial:sqlite-jdbc
com.openai:openai-java
com.github.victools:jsonschema-generator
com.github.victools:jsonschema-module-jackson

BSD
------------
Expand Down
3 changes: 2 additions & 1 deletion docs/configuration/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co
| kyuubi.engine.data.agent.max.iterations | 100 | The maximum number of ReAct loop iterations for the Data Agent engine. | int | 1.12.0 |
| kyuubi.engine.data.agent.memory | 1g | The heap memory for the Data Agent engine | string | 1.12.0 |
| kyuubi.engine.data.agent.provider | ECHO | The provider for the Data Agent engine. Candidates: <ul> <li>ECHO: simply echoes the input, for testing purpose.</li> <li>OPENAI_COMPATIBLE: OpenAI-compatible LLM provider.</li></ul> | string | 1.12.0 |
| kyuubi.engine.data.agent.query.timeout | PT5M | The query execution timeout for the Data Agent SQL tool. | duration | 1.12.0 |
| kyuubi.engine.data.agent.query.timeout | PT3M | The JDBC query execution timeout for the Data Agent SQL tools. Passed to <code>Statement.setQueryTimeout</code> so the server (Spark/Trino/...) can cooperatively cancel long-running queries and release cluster resources. Should be set lower than <code>kyuubi.engine.data.agent.tool.call.timeout</code> so server-side cancellation has time to react before the outer wall-clock cap fires. | duration | 1.12.0 |
| kyuubi.engine.data.agent.tool.call.timeout | PT5M | The maximum wall-clock execution time for any tool call in the Data Agent engine. Acts as the outer safety net enforced by the agent runtime via <code>Future.cancel()</code>, applied uniformly to every tool. For SQL tools the inner JDBC-level timeout is controlled separately by <code>kyuubi.engine.data.agent.query.timeout</code>, which should be set lower so server-side cancellation has time to react before this hard cap fires. | duration | 1.12.0 |
| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 |
| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 |
| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 |
Expand Down
27 changes: 27 additions & 0 deletions externals/kyuubi-data-agent-engine/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>com.openai</groupId>
<artifactId>openai-java</artifactId>
</dependency>

<dependency>
<groupId>com.github.victools</groupId>
<artifactId>jsonschema-generator</artifactId>
</dependency>

<dependency>
<groupId>com.github.victools</groupId>
<artifactId>jsonschema-module-jackson</artifactId>
</dependency>

<!-- test dependencies -->
<dependency>
<groupId>org.apache.kyuubi</groupId>
Expand All @@ -65,6 +80,18 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers-mysql</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.kyuubi.engine.dataagent.datasource;

import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import javax.sql.DataSource;

/** Factory for creating pooled DataSource instances from JDBC URLs. */
public final class DataSourceFactory {

private static final int DEFAULT_MAX_POOL_SIZE = 5;

private DataSourceFactory() {}

/**
* Create a pooled DataSource from a JDBC URL. Supports any JDBC driver available on the
* classpath.
*
* @param jdbcUrl the JDBC connection URL
* @return a HikariCP-backed DataSource
*/
public static DataSource create(String jdbcUrl) {
return create(jdbcUrl, null, null);
}

/**
* Create a pooled DataSource from a JDBC URL with an explicit username. When the data-agent
* connects back to Kyuubi Server, the username determines the proxy user for the downstream
* engine (e.g. Spark). Without it, Kyuubi defaults to "anonymous" which typically fails Hadoop
* impersonation checks.
*
* @param jdbcUrl the JDBC connection URL
* @param user the username for the JDBC connection, may be null
* @return a HikariCP-backed DataSource
*/
public static DataSource create(String jdbcUrl, String user) {
return create(jdbcUrl, user, null);
}

/**
* Create a pooled DataSource from a JDBC URL with explicit credentials. Prefer this overload when
* a password is required: passing the password through {@link HikariConfig#setPassword} keeps it
* out of the JDBC URL, which would otherwise leak the password into log lines, JMX pool metadata,
* exception messages, and connection strings printed by debug tooling.
*
* @param jdbcUrl the JDBC connection URL
* @param user the username for the JDBC connection, may be null
* @param password the password for the JDBC connection, may be null
* @return a HikariCP-backed DataSource
*/
public static DataSource create(String jdbcUrl, String user, String password) {
if (jdbcUrl == null || jdbcUrl.isEmpty()) {
throw new IllegalArgumentException("jdbcUrl must not be null or empty");
}
HikariConfig config = new HikariConfig();
config.setJdbcUrl(jdbcUrl);
if (user != null && !user.isEmpty()) {
config.setUsername(user);
}
if (password != null && !password.isEmpty()) {
config.setPassword(password);
}
config.setMaximumPoolSize(DEFAULT_MAX_POOL_SIZE);
config.setMinimumIdle(1);
config.setInitializationFailTimeout(-1);
config.setPoolName("kyuubi-data-agent");
return new HikariDataSource(config);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.kyuubi.engine.dataagent.datasource;

/**
* Fallback dialect for JDBC subprotocols that have no dedicated implementation. Carries the
* subprotocol name (e.g. "postgresql", "clickhouse") so prompts can still tell the LLM which SQL
* flavor it is talking to. {@link #quoteIdentifier(String)} is intentionally unsupported — callers
* that need quoting must check the dialect type first or pick a tool that does not depend on
* dialect-specific identifier quoting.
*/
public final class GenericDialect implements JdbcDialect {

private final String name;

public GenericDialect(String name) {
this.name = name;
}

@Override
public String datasourceName() {
return name;
}

@Override
public String quoteIdentifier(String identifier) {
throw new UnsupportedOperationException(
"quoteIdentifier is not supported for generic dialect: " + name);
}
}
Loading
Loading