diff --git a/README.md b/README.md
index 7cac0788..927314ff 100644
--- a/README.md
+++ b/README.md
@@ -51,6 +51,10 @@ docker compose up --build
Then, navigate to `http://localhost:3000` (It may take some time to boot up) and
interact with the website.
+> **Note:** If an issue occurs with the build, please head to the
+> [FAQ](https://projects.laion.ai/Open-Assistant/docs/faq) and check out the
+> entries about Docker.
+
> **Note:** When logging in via email, navigate to `http://localhost:1080` to
> get the magic email login link.
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 908457cd..b9c9c82f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -131,3 +131,61 @@ services:
ports:
- "3000:3000"
command: bash wait-for-postgres.sh node server.js
+
+ inference-server:
+ build:
+ dockerfile: docker/inference/Dockerfile.server
+ context: .
+ target: dev
+ image: oasst-inference-server:dev
+ environment:
+ - "PORT=8000"
+ - "REDIS_HOST=redis"
+ volumes:
+ - "./oasst-shared:/opt/inference/lib/oasst-shared"
+ - "./inference/server:/opt/inference/server"
+ restart: unless-stopped
+ depends_on:
+ redis:
+ condition: service_healthy
+ profiles: ["inference"]
+
+ inference-worker:
+ build:
+ dockerfile: docker/inference/Dockerfile.worker
+ context: .
+ target: dev
+ image: oasst-inference-worker:dev
+ environment:
+ - "BACKEND_URL=ws://inference-server:8000"
+ - "INFERENCE_SERVER_URL=http://inference-text-generation-server"
+ volumes:
+ - "./oasst-shared:/opt/inference/lib/oasst-shared"
+ - "./inference/worker:/opt/inference/worker"
+ depends_on:
+ - inference-server
+ deploy:
+ replicas: 1
+ profiles: ["inference"]
+
+ inference-text-client:
+ build:
+ dockerfile: docker/inference/Dockerfile.text-client
+ context: .
+ image: oasst-inference-text-client
+ environment:
+ - "BACKEND_URL=http://inference-server:8000"
+ tty: true
+ stdin_open: true
+ volumes:
+ - "./inference/worker:/opt/inference/worker"
+ restart: unless-stopped
+ depends_on:
+ - inference-server
+ profiles: ["inference"]
+
+ inference-text-generation-server:
+ image: ykilcher/text-generation-inference
+ environment:
+ - "MODEL_ID=distilgpt2"
+ profiles: ["inference"]
diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server
new file mode 100644
index 00000000..0838a21e
--- /dev/null
+++ b/docker/inference/Dockerfile.server
@@ -0,0 +1,90 @@
+# syntax=docker/dockerfile:1
+
+ARG MODULE="inference"
+ARG SERVICE="server"
+
+ARG APP_USER="${MODULE}-${SERVICE}"
+ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
+
+
+FROM python:3-slim as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ --target=lib \
+ -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+ARG MODULE
+ARG SERVICE
+
+ENV APP_BASE="/opt/${MODULE}"
+ENV APP_ROOT="${APP_BASE}/${SERVICE}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+ENV PORT="8000"
+
+
+RUN adduser \
+ --disabled-password \
+ --no-create-home \
+ "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py .
+
+
+
+FROM base-env as dev
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
+
+USER root
+RUN --mount=type=cache,target=/var/cache/pip,from=build \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ -e "${SHARED_LIBS_BASE}/oasst-shared"
+USER ${APP_USER}
+
+
+VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
+
+
+CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
+
+
+
+FROM base-env as prod
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
+RUN --mount=type=cache,target=/var/cache/pip,from=dev \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ --target="${APP_LIBS}" \
+ /tmp/lib/oasst-shared
+
+
+CMD uvicorn main:app --host 0.0.0.0 --port "${PORT}"
diff --git a/docker/inference/Dockerfile.text-client b/docker/inference/Dockerfile.text-client
new file mode 100644
index 00000000..23a54abe
--- /dev/null
+++ b/docker/inference/Dockerfile.text-client
@@ -0,0 +1,50 @@
+# syntax=docker/dockerfile:1
+
+ARG APP_USER="text-client"
+ARG APP_RELATIVE_PATH="inference/text-client"
+
+
+FROM python:3.10-alpine3.17 as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ --target=lib \
+ -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+
+ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+
+RUN adduser \
+ --disabled-password \
+ --no-create-home \
+ "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
+
+
+
+FROM base-env as prod
+
+
+CMD python3 __main__.py --backend-url "${BACKEND_URL}"
diff --git a/docker/inference/Dockerfile.worker b/docker/inference/Dockerfile.worker
new file mode 100644
index 00000000..06f040ab
--- /dev/null
+++ b/docker/inference/Dockerfile.worker
@@ -0,0 +1,85 @@
+# syntax=docker/dockerfile:1
+
+ARG MODULE="inference"
+ARG SERVICE="worker"
+
+ARG APP_USER="${MODULE}-${SERVICE}"
+ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}"
+
+
+FROM python:3.10-alpine3.17 as build
+ARG APP_RELATIVE_PATH
+
+WORKDIR /build
+
+COPY ./${APP_RELATIVE_PATH}/requirements.txt .
+
+RUN --mount=type=cache,target=/var/cache/pip \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ --target=lib \
+ -r requirements.txt
+
+
+
+FROM python:3.10-alpine3.17 as base-env
+ARG APP_USER
+ARG APP_RELATIVE_PATH
+ARG MODULE
+ARG SERVICE
+
+ENV APP_BASE="/opt/${MODULE}"
+ENV APP_ROOT="${APP_BASE}/${SERVICE}"
+ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib"
+ENV SHARED_LIBS_BASE="${APP_BASE}/lib"
+
+ENV PATH="${PATH}:${APP_LIBS}/bin"
+ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}"
+
+
+RUN adduser \
+ --disabled-password \
+ --no-create-home \
+ "${APP_USER}"
+
+USER ${APP_USER}
+
+WORKDIR ${APP_ROOT}
+
+
+COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS}
+COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py .
+
+
+CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}"
+
+
+
+FROM base-env as dev
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared ${SHARED_LIBS_BASE}/oasst-shared
+
+USER root
+RUN --mount=type=cache,target=/var/cache/pip,from=build \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ -e "${SHARED_LIBS_BASE}/oasst-shared"
+USER ${APP_USER}
+
+
+VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
+
+
+
+FROM base-env as prod
+ARG APP_USER
+
+
+COPY --chown="${APP_USER}:${APP_USER}" ./oasst-shared /tmp/lib/oasst-shared
+RUN --mount=type=cache,target=/var/cache/pip,from=dev \
+ pip install \
+ --cache-dir=/var/cache/pip \
+ --target="${APP_LIBS}" \
+ /tmp/lib/oasst-shared
diff --git a/docs/docs/faq/faq.md b/docs/docs/faq/faq.md
index 0db57d30..025ae02b 100644
--- a/docs/docs/faq/faq.md
+++ b/docs/docs/faq/faq.md
@@ -9,6 +9,33 @@ For more details and information check out
[this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose)
that explains it all in detail.
+### Enable Docker's BuildKit Backend
+
+[BuildKit](https://docs.docker.com/build/buildkit/) is Docker's new and improved
+builder backend. In addition to being faster and more efficient, it supports
+many new features, among which is the ability to provide a persistent cache,
+which outlives builds, to compilers and package managers. This is very useful to
+speed up consecutive builds, and is used by some container images of
+OpenAssistant's stack.
+
+The BuildKit backend is used by
+[default by Compose V2](https://www.docker.com/blog/announcing-compose-v2-general-availability/)
+(see above).
But if you want to build an image with `docker build` instead
+of `docker compose build`, you might need to enable BuildKit.
+
+To do so, just add `DOCKER_BUILDKIT=1` to your environment.
+
+For instance:
+
+```shell
+export DOCKER_BUILDKIT=1
+```
+
+You could also, more conveniently,
+[enable BuildKit by default](https://docs.docker.com/build/buildkit/#:~:text=To%20enable%20docker%20BuildKit%20by%20default),
+or use
+[Docker Buildx](https://docs.docker.com/build/#:~:text=The%20new%20client%20Docker%20Buildx).
+
### Pre-commit
We are using pre-commit to ensure the quality of the code as well as the same
diff --git a/inference/README.md b/inference/README.md
index 6e1da2c7..0475c876 100644
--- a/inference/README.md
+++ b/inference/README.md
@@ -1,14 +1,64 @@
-# OpenAssitant Inference
+# OpenAssistant Inference
Preliminary implementation of the inference engine for OpenAssistant.
-## Development Variant 1 (you'll need tmux)
+## Development Variant 1 (docker compose)
+
+The services of the inference stack are prefixed with "inference-" in the
+[unified compose descriptor](../docker-compose.yaml).
Prior to building
+those, please ensure that you have Docker's new
+[BuildKit](https://docs.docker.com/build/buildkit/) backend enabled. See the
+[FAQ](https://projects.laion.ai/Open-Assistant/docs/faq#enable-dockers-buildkit-backend)
+for more info.
+
+To build the services, run:
+
+```shell
+docker compose --profile inference build
+```
+
+Spin up the stack:
+
+```shell
+docker compose --profile inference up -d
+```
+
+Tail the logs:
+
+```shell
+docker compose logs -f \
+ inference-server \
+ inference-worker \
+ inference-text-client \
+ inference-text-generation-server
+```
+
+Attach to the text-client, and start chatting:
+
+```shell
+docker attach open-assistant-inference-text-client-1
+```
+
+> **Note:** In the last step, `open-assistant-inference-text-client-1` refers to
+> the name of the `text-client` container started in step 2.
+
+> **Note:** The compose file contains the bind mounts enabling you to develop on
+> the modules of the inference stack, and the `oasst-shared` package, without
+> rebuilding.
+
+> **Note:** You can spin up any number of workers by adjusting the number of
+> replicas of the `inference-worker` service to your liking.
+
+> **Note:** Please wait for the `inference-text-generation-server` service to
+> output `{"message":"Connected"}` before starting to chat.
+
+## Development Variant 2 (you'll need tmux)
Run `./full-dev-setup.sh` to start the full development setup. Make sure to wait
until the 2nd terminal is ready and says `{"message":"Connected"}` before
entering input into the last terminal.
-## Development Variant 2 (you'll need multiple terminals)
+## Development Variant 3 (you'll need multiple terminals)
Run a redis container (or use the one of the general docker compose file):