Merge pull request #1024 from occupytheweb/913_inference_server_docker_setup

inference: Define container images and compose descriptor for the inference stack
This commit is contained in:
Alan Jean
2023-01-31 20:33:45 +04:00
committed by GitHub
7 changed files with 367 additions and 3 deletions
+4
View File
@@ -51,6 +51,10 @@ docker compose up --build
Then, navigate to `http://localhost:3000` (It may take some time to boot up) and
interact with the website.
> **Note:** If an issue occurs with the build, please head to the
> [FAQ](https://projects.laion.ai/Open-Assistant/docs/faq) and check out the
> entries about Docker.
> **Note:** When logging in via email, navigate to `http://localhost:1080` to
> get the magic email login link.
+58
View File
@@ -131,3 +131,61 @@ services:
ports:
- "3000:3000"
command: bash wait-for-postgres.sh node server.js
inference-server:
build:
dockerfile: docker/inference/Dockerfile.server
context: .
target: dev
image: oasst-inference-server:dev
environment:
- "PORT=8000"
- "REDIS_HOST=redis"
volumes:
- "./oasst-shared:/opt/inference/lib/oasst-shared"
- "./inference/server:/opt/inference/server"
restart: unless-stopped
depends_on:
redis:
condition: service_healthy
profiles: ["inference"]
inference-worker:
build:
dockerfile: docker/inference/Dockerfile.worker
context: .
target: dev
image: oasst-inference-worker:dev
environment:
- "BACKEND_URL=ws://inference-server:8000"
- "INFERENCE_SERVER_URL=http://inference-text-generation-server"
volumes:
- "./oasst-shared:/opt/inference/lib/oasst-shared"
- "./inference/worker:/opt/inference/worker"
depends_on:
- inference-server
deploy:
replicas: 1
profiles: ["inference"]
inference-text-client:
build:
dockerfile: docker/inference/Dockerfile.text-client
context: .
image: oasst-inference-text-client
environment:
- "BACKEND_URL=http://inference-server:8000"
tty: true
stdin_open: true
volumes:
- "./inference/worker:/opt/inference/worker"
restart: unless-stopped
depends_on:
- inference-server
profiles: ["inference"]
inference-text-generation-server:
image: ykilcher/text-generation-inference
environment:
- "MODEL_ID=distilgpt2"
profiles: ["inference"]
+90
View File
@@ -0,0 +1,90 @@
# syntax=docker/dockerfile:1
ARG MODULE="inference"
ARG SERVICE="server"
ARG APP_USER="${MODULE}-${SERVICE}"
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
FROM python:3-slim as build
ARG APP_RELATIVE_PATH
WORKDIR /build
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
RUN --mount=type=cache,target=/var/cache/pip \
pip install \
--cache-dir=/var/cache/pip \
--target=lib \
-r requirements.txt
FROM python:3.10-alpine3.17 as base-env
ARG APP_USER
ARG APP_RELATIVE_PATH
ARG MODULE
ARG SERVICE
ENV APP_BASE="/opt/${MODULE}"
ENV APP_ROOT="${APP_BASE}/${SERVICE}"
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
ENV PATH="${PATH}:${APP_LIBS}/bin"
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
ENV PORT="8000"
RUN adduser \
--disabled-password \
--no-create-home \
"${APP_USER}"
USER ${APP_USER}
WORKDIR ${APP_ROOT}
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py .
FROM base-env as dev
ARG APP_USER
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
USER root
RUN --mount=type=cache,target=/var/cache/pip,from=build \
pip install \
--cache-dir=/var/cache/pip \
-e "${SHARED_LIBS_BASE}/oasst-shared"
USER ${APP_USER}
VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
FROM base-env as prod
ARG APP_USER
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
RUN --mount=type=cache,target=/var/cache/pip,from=dev \
pip install \
--cache-dir=/var/cache/pip \
--target="${APP_LIBS}" \
/tmp/lib/oasst-shared
CMD uvicorn main:app --host 0.0.0.0 --port "${PORT}"
+50
View File
@@ -0,0 +1,50 @@
# syntax=docker/dockerfile:1
ARG APP_USER="text-client"
ARG APP_RELATIVE_PATH="inference/text-client"
FROM python:3.10-alpine3.17 as build
ARG APP_RELATIVE_PATH
WORKDIR /build
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
RUN --mount=type=cache,target=/var/cache/pip \
pip install \
--cache-dir=/var/cache/pip \
--target=lib \
-r requirements.txt
FROM python:3.10-alpine3.17 as base-env
ARG APP_USER
ARG APP_RELATIVE_PATH
ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}"
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
ENV PATH="${PATH}:${APP_LIBS}/bin"
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
RUN adduser \
--disabled-password \
--no-create-home \
"${APP_USER}"
USER ${APP_USER}
WORKDIR ${APP_ROOT}
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
FROM base-env as prod
CMD python3 __main__.py --backend-url "${BACKEND_URL}"
+85
View File
@@ -0,0 +1,85 @@
# syntax=docker/dockerfile:1
ARG MODULE="inference"
ARG SERVICE="worker"
ARG APP_USER="${MODULE}-${SERVICE}"
ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
FROM python:3.10-alpine3.17 as build
ARG APP_RELATIVE_PATH
WORKDIR /build
COPY ./${APP_RELATIVE_PATH}/requirements.txt .
RUN --mount=type=cache,target=/var/cache/pip \
pip install \
--cache-dir=/var/cache/pip \
--target=lib \
-r requirements.txt
FROM python:3.10-alpine3.17 as base-env
ARG APP_USER
ARG APP_RELATIVE_PATH
ARG MODULE
ARG SERVICE
ENV APP_BASE="/opt/${MODULE}"
ENV APP_ROOT="${APP_BASE}/${SERVICE}"
ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
ENV PATH="${PATH}:${APP_LIBS}/bin"
ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
RUN adduser \
--disabled-password \
--no-create-home \
"${APP_USER}"
USER ${APP_USER}
WORKDIR ${APP_ROOT}
COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
FROM base-env as dev
ARG APP_USER
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
USER root
RUN --mount=type=cache,target=/var/cache/pip,from=build \
pip install \
--cache-dir=/var/cache/pip \
-e "${SHARED_LIBS_BASE}/oasst-shared"
USER ${APP_USER}
VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
FROM base-env as prod
ARG APP_USER
COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
RUN --mount=type=cache,target=/var/cache/pip,from=dev \
pip install \
--cache-dir=/var/cache/pip \
--target="${APP_LIBS}" \
/tmp/lib/oasst-shared
+27
View File
@@ -9,6 +9,33 @@ For more details and information check out
[this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose)
that explains it all in detail.
### Enable Docker's BuildKit Backend
[BuildKit](https://docs.docker.com/build/buildkit/) is Docker's new and improved
builder backend. In addition to being faster and more efficient, it supports
many new features, among which is the ability to provide a persistent cache,
which outlives builds, to compilers and package managers. This is very useful to
speed up consecutive builds, and is used by some container images of
OpenAssistant's stack.
The BuildKit backend is used by
[default by Compose V2](https://www.docker.com/blog/announcing-compose-v2-general-availability/)
(see above). <br/> But if you want to build an image with `docker build` instead
of `docker compose build`, you might need to enable BuildKit.
To do so, just add `DOCKER_BUILDKIT=1` to your environment.
For instance:
```shell
export DOCKER_BUILDKIT=1
```
You could also, more conveniently,
[enable BuildKit by default](https://docs.docker.com/build/buildkit/#:~:text=To%20enable%20docker%20BuildKit%20by%20default),
or use
[Docker Buildx](https://docs.docker.com/build/#:~:text=The%20new%20client%20Docker%20Buildx).
### Pre-commit
We are using pre-commit to ensure the quality of the code as well as the same
+53 -3
View File
@@ -1,14 +1,64 @@
# OpenAssitant Inference
# OpenAssistant Inference
Preliminary implementation of the inference engine for OpenAssistant.
## Development Variant 1 (you'll need tmux)
## Development Variant 1 (docker compose)
The services of the inference stack are prefixed with "inference-" in the
[unified compose descriptor](../docker-compose.yaml). <br/> Prior to building
those, please ensure that you have Docker's new
[BuildKit](https://docs.docker.com/build/buildkit/) backend enabled. See the
[FAQ](https://projects.laion.ai/Open-Assistant/docs/faq#enable-dockers-buildkit-backend)
for more info.
To build the services, run:
```shell
docker compose --profile inference build
```
Spin up the stack:
```shell
docker compose --profile inference up -d
```
Tail the logs:
```shell
docker compose logs -f \
inference-server \
inference-worker \
inference-text-client \
inference-text-generation-server
```
Attach to the text-client, and start chatting:
```shell
docker attach open-assistant-inference-text-client-1
```
> **Note:** In the last step, `open-assistant-inference-text-client-1` refers to
> the name of the `text-client` container started in step 2.
> **Note:** The compose file contains the bind mounts enabling you to develop on
> the modules of the inference stack, and the `oasst-shared` package, without
> rebuilding.
> **Note:** You can spin up any number of workers by adjusting the number of
> replicas of the `inference-worker` service to your liking.
> **Note:** Please wait for the `inference-text-generation-server` service to
> output `{"message":"Connected"}` before starting to chat.
## Development Variant 2 (you'll need tmux)
Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait
until the 2nd terminal is ready and says `{"message":"Connected"}` before
entering input into the last terminal.
## Development Variant 2 (you'll need multiple terminals)
## Development Variant 3 (you'll need multiple terminals)
Run a redis container (or use the one of the general docker compose file):