bytedance · kr11 · Jan 31, 2026 · Jan 18, 2026 · Jan 18, 2026 · Jan 28, 2026
diff --git a/build/Dockerfile.videxserver b/build/Dockerfile.videxserver
@@ -0,0 +1,28 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1 \
+    VIDEX_CONTAINER=1 \
+    PYTHONPATH=/opt/videx/src
+
+WORKDIR /opt/videx
+
+COPY requirements.txt /opt/videx/requirements.txt
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        gcc \
+        python3-dev \
+    && pip install -r /opt/videx/requirements.txt \
+    && apt-get purge -y --auto-remove gcc python3-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY src/ /opt/videx/src/
+COPY build/videx_container_entrypoint.py /opt/videx/videx_container_entrypoint.py
+
+# Default/documentation port. You can still map any host port to container 5001 via -p HOST:5001.
+EXPOSE 5001
+
+ENTRYPOINT ["python", "/opt/videx/videx_container_entrypoint.py"]
+CMD ["server"]
diff --git a/build/videx_container_entrypoint.py b/build/videx_container_entrypoint.py
@@ -0,0 +1,183 @@
+"""
+VIDEX container entrypoint.
+
+This entrypoint provides two modes:
+- `server`: start the long-running VIDEX stats server (default)
+- `sync`:   run the one-shot sync/env build script and exit
+
+Design notes:
+- Keep argument handling minimal.
+- Do not rewrite user arguments.
+- Best-effort warnings are emitted for common container networking pitfalls
+  (e.g. using localhost/127.0.0.1 in --target inside a container).
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+import subprocess
+from typing import List, Optional, Tuple
+
+LOCALHOST_NAMES = {"127.0.0.1", "localhost", "::1"}
+
+
+def _in_container_best_effort() -> bool:
+    """
+    Best-effort heuristics to detect container environment.
+    Used only for warnings (never for rewriting args or failing).
+    """
+    explicit = os.environ.get("VIDEX_CONTAINER")
+    if explicit and explicit.strip().lower() not in {"0", "false", "no"}:
+        return True
+
+    if os.path.exists("/.dockerenv"):
+        return True
+
+    if os.environ.get("container"):
+        return True
+
+    try:
+        with open("/proc/1/cgroup", "rt", encoding="utf-8") as f:
+            c = f.read()
+        hints = ("docker", "containerd", "kubepods", "podman")
+        return any(h in c for h in hints)
+    except OSError:
+        return False
+
+
+def _usage() -> str:
+    return (
+        "Usage:\n"
+        "  <image> [server]\n"
+        "  <image> sync --target HOST:PORT:DB:USER:PASS [--videx ...] [other args]\n"
+        "\n"
+        "Commands:\n"
+        "  server   Start VIDEX server (default).\n"
+        "  sync     Run one-shot scripts to collect metadata from --target, then add metadata into videx-server, and create virtual tables in --videx.\n"
+        "\n"
+        "Notes:\n"
+        "  In a container, 127.0.0.1/localhost refers to the container itself.\n"
+        "  See doc/VIDEX_SERVER_DOCKER.md for Docker networking tips.\n"
+    )
+
+
+def _extract_flag_value(argv: List[str], name: str) -> Tuple[Optional[str], bool]:
+    """
+    Extract the value of a CLI flag from argv, supporting:
+      --name value
+      --name=value
+
+    Returns (value, present):
+      - present=False => flag not present
+      - present=True and value=None => flag present but missing value
+    """
+    for i, tok in enumerate(argv):
+        if tok == name:
+            if i + 1 >= len(argv) or argv[i + 1].startswith("--"):
+                return None, True
+            return argv[i + 1], True
+        if tok.startswith(name + "="):
+            return tok.split("=", 1)[1], True
+    return None, False
+
+
+def _parse_target_host(target: str) -> Optional[str]:
+    """
+    Parse host from a connection string of form:
+      host:port:db:user:password
+
+    We only need host for warnings, so do not over-validate.
+    If format is unexpected, return None.
+    """
+    if not target or ":" not in target:
+        return None
+    host = target.split(":", 1)[0].strip()
+    return host or None
+
+
+def _maybe_warn_localhost_target(argv: List[str]) -> None:
+    """
+    Print best-effort warnings about using localhost/127.0.0.1 inside containers.
+    No rewriting; no hard failure.
+    """
+    target, present = _extract_flag_value(argv, "--target")
+
+    if not present:
+        sys.stderr.write(
+            "Warning: 'sync' usually needs --target HOST:PORT:DB:USER:PASS.\n"
+            "         The sync script will likely fail without it.\n\n"
+        )
+        return
+
+    if target is None:
+        sys.stderr.write(
+            "Warning: '--target' flag is present but has no value.\n"
+            "         The sync script will likely fail. Usage:\n\n"
+            f"{_usage()}\n"
+        )
+        return
+
+    host = _parse_target_host(target)
+    if not host or host not in LOCALHOST_NAMES:
+        return
+
+    if not _in_container_best_effort():
+        return
+
+    sys.stderr.write(
+        "Warning: You may be running in a container, but the `--target` parameter is configured with 127.0.0.1/localhost.\n"
+        "         In a container, localhost usually refers to the container itself.\n"
+        "         If your MariaDB/VIDEX runs on the host machine, this may fail.\n\n"
+        "Suggestions:\n"
+        "  - Docker Desktop (Mac/Windows): try host.docker.internal in --target.\n"
+        "  - Linux Docker Engine: add this when running the container:\n"
+        "      --add-host=host.docker.internal:host-gateway\n"
+        "    then use host.docker.internal in --target.\n"
+        "  - If DB runs in the same container / same network namespace, localhost can be correct.\n\n"
+    )
+
+
+def _run_module(module: str, argv: List[str]) -> int:
+    cmd = [sys.executable, "-m", module] + argv
+    return subprocess.call(cmd)
+
+
+def _run_server(argv: List[str]) -> int:
+    # Runs: python -m sub_platforms.sql_opt.videx.scripts.start_videx_server ...
+    return _run_module("sub_platforms.sql_opt.videx.scripts.start_videx_server", argv)
+
+
+def _run_sync(argv: List[str]) -> int:
+    # Runs: python -m sub_platforms.sql_opt.videx.scripts.videx_build_env ...
+    return _run_module("sub_platforms.sql_opt.videx.scripts.videx_build_env", argv)
+
+
+def main() -> int:
+    if len(sys.argv) <= 1:
+        return _run_server([])
+
+    subcmd = sys.argv[1]
+    argv = sys.argv[2:]
+
+    if subcmd in ("-h", "--help", "help"):
+        sys.stdout.write(_usage())
+        return 0
+
+    if subcmd == "server":
+        return _run_server(argv)
+
+    if subcmd == "sync":
+        _maybe_warn_localhost_target(argv)
+        return _run_sync(argv)
+
+    # Convenience: if user passes flags without 'server', treat as server args.
+    if subcmd.startswith("-"):
+        return _run_server([subcmd] + argv)
+
+    sys.stderr.write(f"Error: unknown command '{subcmd}'.\n\n{_usage()}\n")
+    return 2
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/doc/VIDEX_SERVER_DOCKER.md b/doc/VIDEX_SERVER_DOCKER.md
@@ -0,0 +1,117 @@
+## VIDEX-Server Docker Image Description
+
+The latest public image is:
+
+- `ghcr.io/bytedance/videx-server:0.2.0-preview-test1` (GHCR)
+
+This image supports two entrypoint modes:
+
+- `server` (default): start the VIDEX server
+- `sync`: run a one-shot workflow to collect metadata from `--target`, then add metadata into `videx-server`, and create virtual tables in `--videx`
+
+> Recommendation: prefer using a routable IP address (your host/server IP) instead of `localhost/127.0.0.1`, 
+> to make sure `videx-server` (running in a container) can be reached by `videx-sync` and MariaDB-VIDEX (including `videx-plugin`).  
+> This is especially important because MariaDB-VIDEX also needs to reach `videx-server`, e.g.:
+>
+> `SET SESSION VIDEX_SERVER_IP=<VIDEX_SERVER_IP>:<VIDEX_SERVER_PORT>;`
+
+---
+
+## Build image
+
+Build locally from this repo and tag it as `videx-server:0.2.0`:
+
+```bash
+docker build -f build/Dockerfile.videxserver -t videx-server:0.2.0 .
+```
+
+---
+
+## Quick start
+
+Suppose your machine/server IP is `203.0.113.42` (example only).
+
+### 1) Start the videx-server
+
+Expose container port `5001` to a host port (choose any free host port, like 5001):
+
+```bash
+docker run -d --name videx-server \
+  -p 5001:5001 \
+  ghcr.io/bytedance/videx-server:0.2.0-preview-test1
+```
+
+Then open:
+
+- `http://203.0.113.42:5001`
+- `http://localhost:5001` (only if you are on the same machine)
+
+---
+
+### 2) Run sync (one-shot) against MariaDB (recommended: use host/server IP)
+
+`sync` connects to `--target` (your MariaDB), collects metadata, writes metadata into `videx-server`, and creates virtual tables in `--videx`.
+
+#### Command template
+
+```bash
+docker run --rm --name videx-sync \
+  ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
+  --target <TARGET_HOST>:<TARGET_PORT>:<TARGET_DB>:<TARGET_USER>:<TARGET_PASS> \
+  [--videx <VIDEX_HOST>:<VIDEX_PORT>:<VIDEX_DB>:<VIDEX_USER>:<VIDEX_PASS>] \
+  [--videx_server <VIDEX_SERVER_HOST>:<VIDEX_SERVER_PORT>]
+```
+
+#### Example (fake IP shown)
+
+Suppose:
+
+- Your machine/server IP is `203.0.113.42` (example only)
+- MariaDB is reachable at `203.0.113.42:15508`
+- Source database is `tpch_tiny`
+- User/password: `videx` / `password`
+- `videx-server` is reachable at `203.0.113.42:5001`
+
+Run:
+
+```bash
+docker run --rm --name videx-sync \
+  ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
+  --target 203.0.113.42:15508:tpch_tiny:videx:password \
+  --videx 203.0.113.42:15508:videx_tpch_tiny:videx:password \
+  --videx_server 203.0.113.42:5001
+```
+
+#### Notes
+
+1. If `--videx` is not specified, a default database `videx_{TARGET_DB}` will be created in `--target`.
+2. If your videx-server is not the default `203.0.113.42:5001` , pass:
+   - `--videx_server <VIDEX_SERVER_HOST>:<VIDEX_SERVER_PORT>`
+3. Because MariaDB-VIDEX needs to call back into `videx-server`, you should configure a reachable server address, for example:
+   ```sql
+   SET SESSION VIDEX_SERVER_IP=<VIDEX_SERVER_IP>:<VIDEX_SERVER_PORT>;
+   ```
+   This is another reason why using a routable IP (not `localhost`) is recommended.
+
+---
+
+## FAQ
+
+### Q1: I used `localhost` / `127.0.0.1` in `--target` and it failed. Why?
+
+Inside a container, `localhost/127.0.0.1` refers to the container itself. If MariaDB runs on the Docker host (or elsewhere), the container cannot reach it via `localhost`.
+
+**Linux (Docker Engine) quick fix: use `host.docker.internal` via `--add-host`**
+
+```bash
+docker run --rm --name videx-sync \
+  --add-host=host.docker.internal:host-gateway \
+  ghcr.io/bytedance/videx-server:0.2.0-preview-test1 sync \
+  --target host.docker.internal:<PORT>:<DB>:<USER>:<PASS> \
+  --videx host.docker.internal:<PORT>:<VIDEX_DB>:<VIDEX_USER>:<VIDEX_PASS> \
+  --videx_server host.docker.internal:<VIDEX_SERVER_PORT>
+```
+
+However, you must ensure that MariaDB-VIDEX can still reach `videx-server`; 
+things get tricky if MariaDB-VIDEX itself is also running inside a container. 
+**In that case, using a routable IP is the most recommended way to ensure reachability.**