Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions build/Dockerfile.videxserver
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM python:3.11-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1 \
VIDEX_CONTAINER=1 \
PYTHONPATH=/opt/videx/src

WORKDIR /opt/videx

COPY requirements.txt /opt/videx/requirements.txt
Comment thread
kr11 marked this conversation as resolved.
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
gcc \
python3-dev \
&& pip install -r /opt/videx/requirements.txt \
&& apt-get purge -y --auto-remove gcc python3-dev \
&& rm -rf /var/lib/apt/lists/*

COPY src/ /opt/videx/src/
COPY build/videx_container_entrypoint.py /opt/videx/videx_container_entrypoint.py

# Default/documentation port. You can still map any host port to container 5001 via -p HOST:5001.
EXPOSE 5001

ENTRYPOINT ["python", "/opt/videx/videx_container_entrypoint.py"]
CMD ["server"]
183 changes: 183 additions & 0 deletions build/videx_container_entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""
VIDEX container entrypoint.

This entrypoint provides two modes:
- `server`: start the long-running VIDEX stats server (default)
- `sync`: run the one-shot sync/env build script and exit

Design notes:
- Keep argument handling minimal.
- Do not rewrite user arguments.
- Best-effort warnings are emitted for common container networking pitfalls
(e.g. using localhost/127.0.0.1 in --target inside a container).
"""

from __future__ import annotations

import os
import sys
import subprocess
from typing import List, Optional, Tuple

LOCALHOST_NAMES = {"127.0.0.1", "localhost", "::1"}


def _in_container_best_effort() -> bool:
"""
Best-effort heuristics to detect container environment.
Comment thread
kr11 marked this conversation as resolved.
Used only for warnings (never for rewriting args or failing).
"""
explicit = os.environ.get("VIDEX_CONTAINER")
if explicit and explicit.strip().lower() not in {"0", "false", "no"}:
return True

if os.path.exists("/.dockerenv"):
return True

if os.environ.get("container"):
return True

try:
with open("/proc/1/cgroup", "rt", encoding="utf-8") as f:
c = f.read()
hints = ("docker", "containerd", "kubepods", "podman")
return any(h in c for h in hints)
except OSError:
return False


def _usage() -> str:
return (
"Usage:\n"
" <image> [server]\n"
" <image> sync --target HOST:PORT:DB:USER:PASS [--videx ...] [other args]\n"
"\n"
"Commands:\n"
" server Start VIDEX server (default).\n"
" sync Run one-shot scripts to collect metadata from --target, then add metadata into videx-server, and create virtual tables in --videx.\n"
"\n"
"Notes:\n"
" In a container, 127.0.0.1/localhost refers to the container itself.\n"
" See doc/VIDEX_SERVER_DOCKER.md for Docker networking tips.\n"
)


def _extract_flag_value(argv: List[str], name: str) -> Tuple[Optional[str], bool]:
"""
Extract the value of a CLI flag from argv, supporting:
--name value
--name=value

Returns (value, present):
- present=False => flag not present
- present=True and value=None => flag present but missing value
"""
for i, tok in enumerate(argv):
if tok == name:
if i + 1 >= len(argv) or argv[i + 1].startswith("--"):
return None, True
return argv[i + 1], True
if tok.startswith(name + "="):
return tok.split("=", 1)[1], True
return None, False


def _parse_target_host(target: str) -> Optional[str]:
"""
Parse host from a connection string of form:
host:port:db:user:password

We only need host for warnings, so do not over-validate.
If format is unexpected, return None.
"""
if not target or ":" not in target:
return None
host = target.split(":", 1)[0].strip()
return host or None


def _maybe_warn_localhost_target(argv: List[str]) -> None:
"""
Print best-effort warnings about using localhost/127.0.0.1 inside containers.
No rewriting; no hard failure.
"""
target, present = _extract_flag_value(argv, "--target")

if not present:
sys.stderr.write(
"Warning: 'sync' usually needs --target HOST:PORT:DB:USER:PASS.\n"
" The sync script will likely fail without it.\n\n"
)
return

if target is None:
sys.stderr.write(
"Warning: '--target' flag is present but has no value.\n"
" The sync script will likely fail. Usage:\n\n"
f"{_usage()}\n"
)
return

host = _parse_target_host(target)
if not host or host not in LOCALHOST_NAMES:
return

if not _in_container_best_effort():
return

sys.stderr.write(
"Warning: You may be running in a container, but the `--target` parameter is configured with 127.0.0.1/localhost.\n"
" In a container, localhost usually refers to the container itself.\n"
" If your MariaDB/VIDEX runs on the host machine, this may fail.\n\n"
"Suggestions:\n"
" - Docker Desktop (Mac/Windows): try host.docker.internal in --target.\n"
" - Linux Docker Engine: add this when running the container:\n"
" --add-host=host.docker.internal:host-gateway\n"
" then use host.docker.internal in --target.\n"
" - If DB runs in the same container / same network namespace, localhost can be correct.\n\n"
)


def _run_module(module: str, argv: List[str]) -> int:
cmd = [sys.executable, "-m", module] + argv
return subprocess.call(cmd)


def _run_server(argv: List[str]) -> int:
# Runs: python -m sub_platforms.sql_opt.videx.scripts.start_videx_server ...
return _run_module("sub_platforms.sql_opt.videx.scripts.start_videx_server", argv)


def _run_sync(argv: List[str]) -> int:
# Runs: python -m sub_platforms.sql_opt.videx.scripts.videx_build_env ...
return _run_module("sub_platforms.sql_opt.videx.scripts.videx_build_env", argv)


def main() -> int:
if len(sys.argv) <= 1:
return _run_server([])

subcmd = sys.argv[1]
argv = sys.argv[2:]

if subcmd in ("-h", "--help", "help"):
sys.stdout.write(_usage())
return 0

if subcmd == "server":
return _run_server(argv)

if subcmd == "sync":
_maybe_warn_localhost_target(argv)
return _run_sync(argv)

# Convenience: if user passes flags without 'server', treat as server args.
if subcmd.startswith("-"):
return _run_server([subcmd] + argv)

sys.stderr.write(f"Error: unknown command '{subcmd}'.\n\n{_usage()}\n")
return 2


if __name__ == "__main__":
raise SystemExit(main())
117 changes: 117 additions & 0 deletions doc/VIDEX_SERVER_DOCKER.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
## VIDEX-Server Docker Image Description

The latest public image is:

- `ghcr.io/bytedance/videx-server:0.2.0-preview-test1` (GHCR)

This image supports two entrypoint modes:

- `server` (default): start the VIDEX server
- `sync`: run a one-shot workflow to collect metadata from `--target`, then add metadata into `videx-server`, and create virtual tables in `--videx`

> Recommendation: prefer using a routable IP address (your host/server IP) instead of `localhost/127.0.0.1`,
> to make sure `videx-server` (running in a container) can be reached by `videx-sync` and MariaDB-VIDEX (including `videx-plugin`).
> This is especially important because MariaDB-VIDEX also needs to reach `videx-server`, e.g.:
>
> `SET SESSION VIDEX_SERVER_IP=<VIDEX_SERVER_IP>:<VIDEX_SERVER_PORT>;`

---

## Build image

Build locally from this repo and tag it as `videx-server:0.2.0`:

```bash
docker build -f build/Dockerfile.videxserver -t videx-server:0.2.0 .
Comment thread
kr11 marked this conversation as resolved.
```

---

## Quick start

Suppose your machine/server IP is `203.0.113.42` (example only).

### 1) Start the videx-server

Expose container port `5001` to a host port (choose any free host port, like 5001):

```bash
docker run -d --name videx-server \
-p 5001:5001 \
ghcr.io/bytedance/videx-server:0.2.0-preview-test1
```

Then open:

- `http://203.0.113.42:5001`
- `http://localhost:5001` (only if you are on the same machine)

---

### 2) Run sync (one-shot) against MariaDB (recommended: use host/server IP)

`sync` connects to `--target` (your MariaDB), collects metadata, writes metadata into `videx-server`, and creates virtual tables in `--videx`.

#### Command template

```bash
docker run --rm --name videx-sync \
ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
--target <TARGET_HOST>:<TARGET_PORT>:<TARGET_DB>:<TARGET_USER>:<TARGET_PASS> \
[--videx <VIDEX_HOST>:<VIDEX_PORT>:<VIDEX_DB>:<VIDEX_USER>:<VIDEX_PASS>] \
[--videx_server <VIDEX_SERVER_HOST>:<VIDEX_SERVER_PORT>]
```

#### Example (fake IP shown)

Suppose:

- Your machine/server IP is `203.0.113.42` (example only)
- MariaDB is reachable at `203.0.113.42:15508`
- Source database is `tpch_tiny`
- User/password: `videx` / `password`
- `videx-server` is reachable at `203.0.113.42:5001`

Run:

```bash
docker run --rm --name videx-sync \
ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
--target 203.0.113.42:15508:tpch_tiny:videx:password \
--videx 203.0.113.42:15508:videx_tpch_tiny:videx:password \
--videx_server 203.0.113.42:5001
```

#### Notes

1. If `--videx` is not specified, a default database `videx_{TARGET_DB}` will be created in `--target`.
2. If your videx-server is not the default `203.0.113.42:5001` , pass:
- `--videx_server <VIDEX_SERVER_HOST>:<VIDEX_SERVER_PORT>`
3. Because MariaDB-VIDEX needs to call back into `videx-server`, you should configure a reachable server address, for example:
```sql
SET SESSION VIDEX_SERVER_IP=<VIDEX_SERVER_IP>:<VIDEX_SERVER_PORT>;
```
This is another reason why using a routable IP (not `localhost`) is recommended.

---

## FAQ

### Q1: I used `localhost` / `127.0.0.1` in `--target` and it failed. Why?

Inside a container, `localhost/127.0.0.1` refers to the container itself. If MariaDB runs on the Docker host (or elsewhere), the container cannot reach it via `localhost`.

**Linux (Docker Engine) quick fix: use `host.docker.internal` via `--add-host`**

```bash
docker run --rm --name videx-sync \
--add-host=host.docker.internal:host-gateway \
ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
--target host.docker.internal:<PORT>:<DB>:<USER>:<PASS> \
--videx host.docker.internal:<PORT>:<VIDEX_DB>:<VIDEX_USER>:<VIDEX_PASS> \
--videx_server host.docker.internal:<VIDEX_SERVER_PORT>
```

However, you must ensure that MariaDB-VIDEX can still reach `videx-server`;
things get tricky if MariaDB-VIDEX itself is also running inside a container.
**In that case, using a routable IP is the most recommended way to ensure reachability.**
Loading