Merge pull request #1024 from occupytheweb/913_inference_server_docker_setup

inference: Define container images and compose descriptor for the inference stack
2026-06-27 16:10:30 +08:00 · 2023-01-31 20:33:45 +04:00
parent af0ae71bbe e59b73d942
commit bc8d352bf4
7 changed files with 367 additions and 3 deletions
@@ -51,6 +51,10 @@ docker compose up --build
 Then, navigate to `http://localhost:3000` (It may take some time to boot up) and
 interact with the website.

+> **Note:** If an issue occurs with the build, please head to the
+> [FAQ](https://projects.laion.ai/Open-Assistant/docs/faq) and check out the
+> entries about Docker.
+
 > **Note:** When logging in via email, navigate to `http://localhost:1080` to
 > get the magic email login link.

@@ -131,3 +131,61 @@ services:
    ports:
      - "3000:3000"
    command: bash wait-for-postgres.sh node server.js
+
+  inference-server:
+    build:
+      dockerfile: docker/inference/Dockerfile.server
+      context: .
+      target: dev
+    image: oasst-inference-server:dev
+    environment:
+      - "PORT=8000"
+      - "REDIS_HOST=redis"
+    volumes:
+      - "./oasst-shared:/opt/inference/lib/oasst-shared"
+      - "./inference/server:/opt/inference/server"
+    restart: unless-stopped
+    depends_on:
+      redis:
+        condition: service_healthy
+    profiles: ["inference"]
+
+  inference-worker:
+    build:
+      dockerfile: docker/inference/Dockerfile.worker
+      context: .
+      target: dev
+    image: oasst-inference-worker:dev
+    environment:
+      - "BACKEND_URL=ws://inference-server:8000"
+      - "INFERENCE_SERVER_URL=http://inference-text-generation-server"
+    volumes:
+      - "./oasst-shared:/opt/inference/lib/oasst-shared"
+      - "./inference/worker:/opt/inference/worker"
+    depends_on:
+      - inference-server
+    deploy:
+      replicas: 1
+    profiles: ["inference"]
+
+  inference-text-client:
+    build:
+      dockerfile: docker/inference/Dockerfile.text-client
+      context: .
+    image: oasst-inference-text-client
+    environment:
+      - "BACKEND_URL=http://inference-server:8000"
+    tty: true
+    stdin_open: true
+    volumes:
+      - "./inference/worker:/opt/inference/worker"
+    restart: unless-stopped
+    depends_on:
+      - inference-server
+    profiles: ["inference"]
+
+  inference-text-generation-server:
+    image: ykilcher/text-generation-inference
+    environment:
+      - "MODEL_ID=distilgpt2"
+    profiles: ["inference"]
@@ -0,0 +1,90 @@
+# syntax=docker/dockerfile:1
+
+ARG MODULE="inference"
+ARG SERVICE="server"
+
+ARG APP_USER="${MODULE}-${SERVICE}"
+ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
+
+
+FROM python:3-slim as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      --target=lib               \
+      -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+ARG MODULE
+ARG SERVICE
+
+ENV APP_BASE="/opt/${MODULE}"
+ENV APP_ROOT="${APP_BASE}/${SERVICE}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+ENV PORT="8000"
+
+
+RUN adduser               \
+      --disabled-password \
+      --no-create-home    \
+      "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib                     ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}"              ./${APP_RELATIVE_PATH}/main.py .
+
+
+
+FROM base-env as dev
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
+
+USER root
+RUN --mount=type=cache,target=/var/cache/pip,from=build \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      -e "${SHARED_LIBS_BASE}/oasst-shared"
+USER ${APP_USER}
+
+
+VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
+
+
+CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
+
+
+
+FROM base-env as prod
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
+RUN --mount=type=cache,target=/var/cache/pip,from=dev \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      --target="${APP_LIBS}"     \
+      /tmp/lib/oasst-shared
+
+
+CMD uvicorn main:app --host 0.0.0.0 --port "${PORT}"
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1
+
+ARG APP_USER="text-client"
+ARG APP_RELATIVE_PATH="inference/text-client"
+
+
+FROM python:3.10-alpine3.17 as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      --target=lib               \
+      -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+
+ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+
+RUN adduser               \
+      --disabled-password \
+      --no-create-home    \
+      "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib                         ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}"              ./${APP_RELATIVE_PATH}/__main__.py .
+
+
+
+FROM base-env as prod
+
+
+CMD python3 __main__.py --backend-url "${BACKEND_URL}"
@@ -0,0 +1,85 @@
+# syntax=docker/dockerfile:1
+
+ARG MODULE="inference"
+ARG SERVICE="worker"
+
+ARG APP_USER="${MODULE}-${SERVICE}"
+ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
+
+
+FROM python:3.10-alpine3.17 as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      --target=lib               \
+      -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+ARG MODULE
+ARG SERVICE
+
+ENV APP_BASE="/opt/${MODULE}"
+ENV APP_ROOT="${APP_BASE}/${SERVICE}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+
+RUN adduser               \
+      --disabled-password \
+      --no-create-home    \
+      "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib                         ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}"              ./${APP_RELATIVE_PATH}/__main__.py .
+
+
+CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
+
+
+
+FROM base-env as dev
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
+
+USER root
+RUN --mount=type=cache,target=/var/cache/pip,from=build \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      -e "${SHARED_LIBS_BASE}/oasst-shared"
+USER ${APP_USER}
+
+
+VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
+
+
+
+FROM base-env as prod
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
+RUN --mount=type=cache,target=/var/cache/pip,from=dev \
+    pip install                  \
+      --cache-dir=/var/cache/pip \
+      --target="${APP_LIBS}"     \
+      /tmp/lib/oasst-shared
@@ -9,6 +9,33 @@ For more details and information check out
 [this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose)
 that explains it all in detail.

+### Enable Docker's BuildKit Backend
+
+[BuildKit](https://docs.docker.com/build/buildkit/) is Docker's new and improved
+builder backend. In addition to being faster and more efficient, it supports
+many new features, among which is the ability to provide a persistent cache,
+which outlives builds, to compilers and package managers. This is very useful to
+speed up consecutive builds, and is used by some container images of
+OpenAssistant's stack.
+
+The BuildKit backend is used by
+[default by Compose V2](https://www.docker.com/blog/announcing-compose-v2-general-availability/)
+(see above). <br/> But if you want to build an image with `docker build` instead
+of `docker compose build`, you might need to enable BuildKit.
+
+To do so, just add `DOCKER_BUILDKIT=1` to your environment.
+
+For instance:
+
+```shell
+export DOCKER_BUILDKIT=1
+```
+
+You could also, more conveniently,
+[enable BuildKit by default](https://docs.docker.com/build/buildkit/#:~:text=To%20enable%20docker%20BuildKit%20by%20default),
+or use
+[Docker Buildx](https://docs.docker.com/build/#:~:text=The%20new%20client%20Docker%20Buildx).
+
 ### Pre-commit

 We are using pre-commit to ensure the quality of the code as well as the same
@@ -1,14 +1,64 @@
-# OpenAssitant Inference
+# OpenAssistant Inference

 Preliminary implementation of the inference engine for OpenAssistant.

-## Development Variant 1 (you'll need tmux)
+## Development Variant 1 (docker compose)
+
+The services of the inference stack are prefixed with "inference-" in the
+[unified compose descriptor](../docker-compose.yaml). <br/> Prior to building
+those, please ensure that you have Docker's new
+[BuildKit](https://docs.docker.com/build/buildkit/) backend enabled. See the
+[FAQ](https://projects.laion.ai/Open-Assistant/docs/faq#enable-dockers-buildkit-backend)
+for more info.
+
+To build the services, run:
+
+```shell
+docker compose --profile inference build
+```
+
+Spin up the stack:
+
+```shell
+docker compose --profile inference up -d
+```
+
+Tail the logs:
+
+```shell
+docker compose logs -f    \
+    inference-server      \
+    inference-worker      \
+    inference-text-client \
+    inference-text-generation-server
+```
+
+Attach to the text-client, and start chatting:
+
+```shell
+docker attach open-assistant-inference-text-client-1
+```
+
+> **Note:** In the last step, `open-assistant-inference-text-client-1` refers to
+> the name of the `text-client` container started in step 2.
+
+> **Note:** The compose file contains the bind mounts enabling you to develop on
+> the modules of the inference stack, and the `oasst-shared` package, without
+> rebuilding.
+
+> **Note:** You can spin up any number of workers by adjusting the number of
+> replicas of the `inference-worker` service to your liking.
+
+> **Note:** Please wait for the `inference-text-generation-server` service to
+> output `{"message":"Connected"}` before starting to chat.
+
+## Development Variant 2 (you'll need tmux)

 Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait
 until the 2nd terminal is ready and says `{"message":"Connected"}` before
 entering input into the last terminal.

-## Development Variant 2 (you'll need multiple terminals)
+## Development Variant 3 (you'll need multiple terminals)

 Run a redis container (or use the one of the general docker compose file):