diff --git a/.github/workflows/build-inference.yaml b/.github/workflows/build-inference.yaml new file mode 100644 index 00000000..deb70655 --- /dev/null +++ b/.github/workflows/build-inference.yaml @@ -0,0 +1,25 @@ +name: Build Inference Images + +on: + push: + branches: + - main + paths: + - docker/inference/** + - inference/** + +jobs: + build-inference-server: + uses: ./.github/workflows/docker-build.yaml + with: + image-name: oasst-inference-server + context: . + dockerfile: docker/inference/Dockerfile.server + build-args: "" + build-inference-worker-full: + uses: ./.github/workflows/docker-build.yaml + with: + image-name: oasst-inference-worker-full + context: . + dockerfile: docker/inference/Dockerfile.worker-full + build-args: "" \ No newline at end of file diff --git a/.github/workflows/deploy-to-node.yaml b/.github/workflows/deploy-to-node.yaml index 522a7da7..af695bdd 100644 --- a/.github/workflows/deploy-to-node.yaml +++ b/.github/workflows/deploy-to-node.yaml @@ -26,6 +26,7 @@ jobs: environment: ${{ inputs.stack-name }} env: WEB_ADMIN_USERS: ${{ secrets.DEV_WEB_ADMIN_USERS }} + WEB_MODERATOR_USERS: ${{ secrets.DEV_WEB_MODERATOR_USERS }} WEB_DISCORD_CLIENT_ID: ${{ secrets.DEV_WEB_DISCORD_CLIENT_ID }} WEB_DISCORD_CLIENT_SECRET: ${{ secrets.DEV_WEB_DISCORD_CLIENT_SECRET }} WEB_EMAIL_SERVER_HOST: ${{ secrets.DEV_WEB_EMAIL_SERVER_HOST }} diff --git a/CODEOWNERS b/CODEOWNERS index 8929c25b..3f581f5c 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,5 +1,5 @@ * @yk @andreaskoepf -/website/ @fozziethebeat @k-nearest-neighbor @AbdBarho +/website/ @fozziethebeat @k-nearest-neighbor @AbdBarho @notmd /model/ @theblackcat102 @sanagno /copilot/ @fozziethebeat @andreaskoepf @yk /docs/ @andrewm4894 @andreaskoepf @yk diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index 2111e1fc..3cb15a62 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -82,6 +82,7 @@ interval: 2s timeout: 2s retries: 10 + shm_size: 1G loop: - name: backend - name: web @@ -169,6 +170,8 @@ network_mode: "oasst-{{ stack_name }}" env: ADMIN_USERS: "{{ lookup('ansible.builtin.env', 'WEB_ADMIN_USERS') }}" + MODERATOR_USERS: + "{{ lookup('ansible.builtin.env', 'WEB_MODERATOR_USERS') }}" DATABASE_URL: "postgres://postgres:{{ postgres_password }}@oasst-{{ stack_name }}-postgres-web/postgres" diff --git a/backend/main.py b/backend/main.py index 807d52ce..de173f9d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -27,6 +27,7 @@ from oasst_backend.utils.database_utils import CommitMode, managed_tx_function from oasst_shared.exceptions import OasstError, OasstErrorCode from oasst_shared.schemas import protocol as protocol_schema from oasst_shared.utils import utcnow +from prometheus_fastapi_instrumentator import Instrumentator from pydantic import BaseModel from sqlmodel import Session, select from starlette.middleware.cors import CORSMiddleware @@ -100,6 +101,13 @@ if settings.OFFICIAL_WEB_API_KEY: ) +if settings.ENABLE_PROM_METRICS: + + @app.on_event("startup") + async def enable_prom_metrics(): + Instrumentator().instrument(app).expose(app) + + if settings.RATE_LIMIT: @app.on_event("startup") diff --git a/backend/oasst_backend/config.py b/backend/oasst_backend/config.py index 36de7d9b..43d08097 100644 --- a/backend/oasst_backend/config.py +++ b/backend/oasst_backend/config.py @@ -197,6 +197,8 @@ class Settings(BaseSettings): ROOT_TOKENS: List[str] = ["1234"] # supply a string that can be parsed to a json list + ENABLE_PROM_METRICS: bool = True # enable prometheus metrics at /metrics + @validator("DATABASE_URI", pre=True) def assemble_db_connection(cls, v: Optional[str], values: Dict[str, Any]) -> Any: if isinstance(v, str): diff --git a/backend/oasst_backend/prompt_repository.py b/backend/oasst_backend/prompt_repository.py index f2119e3b..eb244d3b 100644 --- a/backend/oasst_backend/prompt_repository.py +++ b/backend/oasst_backend/prompt_repository.py @@ -947,6 +947,9 @@ class PromptRepository: if deleted is not None: qry = qry.filter(Message.deleted == deleted) + if lang is not None: + qry = qry.filter(Message.lang == lang) + if desc: qry = qry.order_by(Message.created_date.desc(), Message.id.desc()) else: @@ -955,9 +958,6 @@ class PromptRepository: if limit is not None: qry = qry.limit(limit) - if lang is not None: - qry = qry.filter(Message.lang == lang) - return self._add_user_emojis_all(qry) def update_children_counts(self, message_tree_id: UUID): diff --git a/backend/requirements.txt b/backend/requirements.txt index 4a0008bb..e0f8c725 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -6,6 +6,7 @@ fastapi-limiter==0.1.5 fastapi-utils==0.2.1 loguru==0.6.0 numpy==1.22.4 +prometheus-fastapi-instrumentator==5.9.1 psycopg2-binary==2.9.5 pydantic==1.9.1 pydantic[email]==1.9.1 diff --git a/docker-compose.yaml b/docker-compose.yaml index 3cfb4c21..52085f62 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -136,6 +136,23 @@ services: - "3000:3000" command: bash wait-for-postgres.sh node server.js + # This DB is for Inference + inference-db: + image: postgres + restart: always + ports: + - 5434:5432 + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: oasst_inference + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 2s + timeout: 2s + retries: 10 + profiles: ["inference"] + inference-server: build: dockerfile: docker/inference/Dockerfile.server @@ -145,13 +162,25 @@ services: environment: - "PORT=8000" - "REDIS_HOST=redis" + - POSTGRES_HOST=inference-db + - POSTGRES_DB=oasst_inference volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/server:/opt/inference/server" restart: unless-stopped + ports: + - "8000:8000" depends_on: redis: condition: service_healthy + inference-db: + condition: service_healthy + profiles: ["inference"] + + inference-text-generation-server: + image: ghcr.io/huggingface/text-generation-inference + environment: + - "MODEL_ID=distilgpt2" profiles: ["inference"] inference-worker: @@ -167,29 +196,7 @@ services: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/worker:/opt/inference/worker" depends_on: - - inference-server + - inference-text-generation-server deploy: replicas: 1 profiles: ["inference"] - - inference-text-client: - build: - dockerfile: docker/inference/Dockerfile.text-client - context: . - image: oasst-inference-text-client - environment: - - "BACKEND_URL=http://inference-server:8000" - tty: true - stdin_open: true - volumes: - - "./inference/worker:/opt/inference/worker" - restart: unless-stopped - depends_on: - - inference-server - profiles: ["inference"] - - inference-text-generation-server: - image: ghcr.io/huggingface/text-generation-inference - environment: - - "MODEL_ID=distilgpt2" - profiles: ["inference"] diff --git a/docker/Dockerfile.backend b/docker/Dockerfile.backend index 3401463c..df9f1858 100644 --- a/docker/Dockerfile.backend +++ b/docker/Dockerfile.backend @@ -14,5 +14,6 @@ COPY ./backend/alembic /app/alembic COPY ./backend/alembic.ini /app/alembic.ini COPY ./backend/main.py /app/main.py COPY ./backend/import.py /app/import.py +COPY ./backend/export.py /app/export.py COPY ./backend/oasst_backend /app/oasst_backend COPY ./backend/test_data /app/test_data diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server index 0838a21e..f5823a9a 100644 --- a/docker/inference/Dockerfile.server +++ b/docker/inference/Dockerfile.server @@ -7,7 +7,7 @@ ARG APP_USER="${MODULE}-${SERVICE}" ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}" -FROM python:3-slim as build +FROM python:3.10-slim as build ARG APP_RELATIVE_PATH WORKDIR /build @@ -22,7 +22,7 @@ RUN --mount=type=cache,target=/var/cache/pip \ -FROM python:3.10-alpine3.17 as base-env +FROM python:3.10-slim as base-env ARG APP_USER ARG APP_RELATIVE_PATH ARG MODULE @@ -50,6 +50,9 @@ WORKDIR ${APP_ROOT} COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic alembic +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/alembic.ini . +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/oasst_inference_server oasst_inference_server COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/main.py . diff --git a/docker/inference/Dockerfile.text-client b/docker/inference/Dockerfile.text-client deleted file mode 100644 index 23a54abe..00000000 --- a/docker/inference/Dockerfile.text-client +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1 - -ARG APP_USER="text-client" -ARG APP_RELATIVE_PATH="inference/text-client" - - -FROM python:3.10-alpine3.17 as build -ARG APP_RELATIVE_PATH - -WORKDIR /build - -COPY ./${APP_RELATIVE_PATH}/requirements.txt . - -RUN --mount=type=cache,target=/var/cache/pip \ - pip install \ - --cache-dir=/var/cache/pip \ - --target=lib \ - -r requirements.txt - - - -FROM python:3.10-alpine3.17 as base-env -ARG APP_USER -ARG APP_RELATIVE_PATH - -ENV APP_ROOT="/opt/${APP_RELATIVE_PATH}" -ENV APP_LIBS="/var/opt/${APP_RELATIVE_PATH}/lib" - -ENV PATH="${PATH}:${APP_LIBS}/bin" -ENV PYTHONPATH="${PYTHONPATH}:${APP_LIBS}" - - -RUN adduser \ - --disabled-password \ - --no-create-home \ - "${APP_USER}" - -USER ${APP_USER} - -WORKDIR ${APP_ROOT} - -COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} -COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py . - - - -FROM base-env as prod - - -CMD python3 __main__.py --backend-url "${BACKEND_URL}" diff --git a/docker/inference/Dockerfile.worker b/docker/inference/Dockerfile.worker index 06f040ab..64e8655a 100644 --- a/docker/inference/Dockerfile.worker +++ b/docker/inference/Dockerfile.worker @@ -48,7 +48,7 @@ WORKDIR ${APP_ROOT} COPY --chown="${APP_USER}:${APP_USER}" --from=build /build/lib ${APP_LIBS} -COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/__main__.py . +COPY --chown="${APP_USER}:${APP_USER}" ./${APP_RELATIVE_PATH}/*.py . CMD python3 __main__.py --backend-url "${BACKEND_URL}" --inference-server-url "${INFERENCE_SERVER_URL}" diff --git a/docker/inference/Dockerfile.worker-full b/docker/inference/Dockerfile.worker-full new file mode 100644 index 00000000..fa94f62e --- /dev/null +++ b/docker/inference/Dockerfile.worker-full @@ -0,0 +1,22 @@ +FROM ghcr.io/huggingface/text-generation-inference + +ARG MODULE="inference" +ARG SERVICE="worker" + +ARG APP_RELATIVE_PATH="${MODULE}/${SERVICE}" + +WORKDIR /worker +COPY ./oasst-shared /tmp/oasst-shared +RUN conda create -n worker python=3.10 -y +RUN /opt/miniconda/envs/worker/bin/pip install /tmp/oasst-shared + +COPY ./${APP_RELATIVE_PATH}/requirements.txt . +RUN /opt/miniconda/envs/worker/bin/pip install -r requirements.txt + +COPY ./${APP_RELATIVE_PATH}/*.py . +COPY ./${APP_RELATIVE_PATH}/worker_full_main.sh /entrypoint.sh + +ENV MODEL_ID="distilgpt2" +ENV INFERENCE_SERVER_URL="http://localhost:80" + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/inference/full-dev-setup.sh b/inference/full-dev-setup.sh index 5ef754d2..c5afb412 100755 --- a/inference/full-dev-setup.sh +++ b/inference/full-dev-setup.sh @@ -3,9 +3,11 @@ # Creates a tmux window with splits for the individual services tmux new-session -d -s "inference-dev-setup" -tmux send-keys "docker run --rm -it -p 6379:6379 redis" C-m +tmux send-keys "docker run --rm -it -p 5432:5432 -e POSTGRES_PASSWORD=postgres --name postgres postgres" C-m tmux split-window -h -tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 ghcr.io/huggingface/text-generation-inference" C-m +tmux send-keys "docker run --rm -it -p 6379:6379 --name redis redis" C-m +tmux split-window -h +tmux send-keys "docker run --rm -it -p 8001:80 -e MODEL_ID=distilgpt2 -v $HOME/.cache/huggingface:/root/.cache/huggingface --name text-generation-inference ghcr.io/huggingface/text-generation-inference" C-m tmux split-window -h tmux send-keys "cd server" C-m tmux send-keys "uvicorn main:app --reload" C-m diff --git a/inference/server/alembic.ini b/inference/server/alembic.ini new file mode 100644 index 00000000..44829313 --- /dev/null +++ b/inference/server/alembic.ini @@ -0,0 +1,105 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = %(here)s/alembic + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python-dateutil library that can be +# installed by adding `alembic[tz]` to the pip requirements +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "version_path_separator" below. +# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions + +# version path separator; As mentioned above, this is the character used to split +# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. +# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. +# Valid values for version_path_separator are: +# +# version_path_separator = : +# version_path_separator = ; +# version_path_separator = space +version_path_separator = os # Use os.pathsep. Default configuration used for new projects. + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# sqlalchemy.url = postgresql://:@/ +sqlalchemy.url = postgresql://postgres:postgres@localhost:5432/postgres + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +hooks = black +black.type = console_scripts +black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/inference/server/alembic/README b/inference/server/alembic/README new file mode 100644 index 00000000..2500aa1b --- /dev/null +++ b/inference/server/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. diff --git a/inference/server/alembic/env.py b/inference/server/alembic/env.py new file mode 100644 index 00000000..55e16d32 --- /dev/null +++ b/inference/server/alembic/env.py @@ -0,0 +1,78 @@ +from logging.config import fileConfig + +import sqlmodel +from alembic import context +from oasst_inference_server import models # noqa: F401 +from sqlalchemy import engine_from_config, pool + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = sqlmodel.SQLModel.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.get_context()._ensure_version_table() + connection.execute("LOCK TABLE alembic_version IN ACCESS EXCLUSIVE MODE") + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/inference/server/alembic/script.py.mako b/inference/server/alembic/script.py.mako new file mode 100644 index 00000000..3124b62c --- /dev/null +++ b/inference/server/alembic/script.py.mako @@ -0,0 +1,25 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/inference/server/alembic/versions/.gitinclude b/inference/server/alembic/versions/.gitinclude new file mode 100644 index 00000000..e69de29b diff --git a/inference/server/alembic/versions/2023_02_10_0221-3a4cd8777eb2_initial_commit.py b/inference/server/alembic/versions/2023_02_10_0221-3a4cd8777eb2_initial_commit.py new file mode 100644 index 00000000..3fa7cd73 --- /dev/null +++ b/inference/server/alembic/versions/2023_02_10_0221-3a4cd8777eb2_initial_commit.py @@ -0,0 +1,36 @@ +"""initial commit + +Revision ID: 3a4cd8777eb2 +Revises: +Create Date: 2023-02-10 02:21:27.086772 + +""" +import sqlalchemy as sa +import sqlmodel +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "3a4cd8777eb2" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "chat", + sa.Column("conversation", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("pending_message_request", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("message_request_state", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column("id", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("chat") + # ### end Alembic commands ### diff --git a/inference/server/main.py b/inference/server/main.py index 0c282394..072bcbde 100644 --- a/inference/server/main.py +++ b/inference/server/main.py @@ -1,18 +1,34 @@ import asyncio -import enum -import uuid +import contextlib +import time +from pathlib import Path +import alembic.command +import alembic.config import fastapi -import pydantic import redis.asyncio as redis +import sqlmodel import websockets.exceptions +from fastapi import Depends from fastapi.middleware.cors import CORSMiddleware from loguru import logger -from oasst_shared.schemas import inference, protocol +from oasst_inference_server import interface +from oasst_inference_server.chat_repository import ChatRepository +from oasst_inference_server.database import db_engine +from oasst_inference_server.settings import settings +from oasst_shared.schemas import inference +from prometheus_fastapi_instrumentator import Instrumentator from sse_starlette.sse import EventSourceResponse app = fastapi.FastAPI() + +# add prometheus metrics at /metrics +@app.on_event("startup") +async def enable_prom_metrics(): + Instrumentator().instrument(app).expose(app) + + # Allow CORS app.add_middleware( CORSMiddleware, @@ -23,106 +39,97 @@ app.add_middleware( ) -class Settings(pydantic.BaseSettings): - redis_host: str = "localhost" - redis_port: int = 6379 - redis_db: int = 0 - - sse_retry_timeout: int = 15000 - - -settings = Settings() - # create async redis client redisClient = redis.Redis( host=settings.redis_host, port=settings.redis_port, db=settings.redis_db, decode_responses=True ) -class CreateChatRequest(pydantic.BaseModel): - pass +def create_session(): + with sqlmodel.Session(db_engine) as session: + yield session -class CreateChatResponse(pydantic.BaseModel): - id: str +def create_chat_repository(session: sqlmodel.Session = Depends(create_session)): + repository = ChatRepository(session) + return repository -class MessageRequest(pydantic.BaseModel): - message: str = pydantic.Field(..., repr=False) - model_name: str = "distilgpt2" - max_new_tokens: int = 100 +if settings.update_alembic: - def compatible_with(self, worker_config: inference.WorkerConfig) -> bool: - return self.model_name == worker_config.model_name + @app.on_event("startup") + def alembic_upgrade(): + logger.info("Attempting to upgrade alembic on startup") + retry = 0 + while True: + try: + alembic_ini_path = Path(__file__).parent / "alembic.ini" + alembic_cfg = alembic.config.Config(str(alembic_ini_path)) + alembic_cfg.set_main_option("sqlalchemy.url", settings.database_uri) + alembic.command.upgrade(alembic_cfg, "head") + logger.info("Successfully upgraded alembic on startup") + break + except Exception: + logger.exception("Alembic upgrade failed on startup") + retry += 1 + if retry >= settings.alembic_retries: + raise + + timeout = settings.alembic_retry_timeout * 2**retry + logger.warning(f"Retrying alembic upgrade in {timeout} seconds") + time.sleep(timeout) -class TokenResponseEvent(pydantic.BaseModel): - token: inference.TokenResponse - - -class MessageRequestState(str, enum.Enum): - pending = "pending" - in_progress = "in_progress" - complete = "complete" - aborted_by_worker = "aborted_by_worker" - - -class DbChatEntry(pydantic.BaseModel): - id: str = pydantic.Field(default_factory=lambda: str(uuid.uuid4())) - conversation: protocol.Conversation = pydantic.Field(default_factory=protocol.Conversation) - pending_message_request: MessageRequest | None = None - message_request_state: MessageRequestState | None = None - - -# TODO: make real database -CHATS: dict[str, DbChatEntry] = {} +@app.get("/chat") +async def list_chats(chat_repository: ChatRepository = Depends(create_chat_repository)) -> interface.ListChatsResponse: + """Lists all chats.""" + logger.info("Listing all chats.") + chats = chat_repository.get_chat_list() + return interface.ListChatsResponse(chats=chats) @app.post("/chat") -async def create_chat(request: CreateChatRequest) -> CreateChatResponse: +async def create_chat( + request: interface.CreateChatRequest, chat_repository: ChatRepository = Depends(create_chat_repository) +) -> interface.ChatListEntry: """Allows a client to create a new chat.""" logger.info(f"Received {request}") - chat = DbChatEntry() - CHATS[chat.id] = chat - return CreateChatResponse(id=chat.id) + chat = chat_repository.create_chat() + return chat.to_list_entry() @app.get("/chat/{id}") -async def get_chat(id: str) -> protocol.Conversation: +async def get_chat(id: str, chat_repository: ChatRepository = Depends(create_chat_repository)) -> interface.ChatEntry: """Allows a client to get the current state of a chat.""" - return CHATS[id].conversation + chat = chat_repository.get_chat_entry_by_id(id) + return chat @app.post("/chat/{id}/message") -async def create_message(id: str, message_request: MessageRequest, fastapi_request: fastapi.Request): +async def create_message( + id: str, + message_request: interface.MessageRequest, + fastapi_request: fastapi.Request, + chat_repository: ChatRepository = Depends(create_chat_repository), +) -> EventSourceResponse: """Allows the client to stream the results of a request.""" - chat = CHATS[id] - if not chat.conversation.is_prompter_turn: - raise fastapi.HTTPException(status_code=400, detail="Not your turn") - if chat.pending_message_request is not None: - raise fastapi.HTTPException(status_code=400, detail="Already pending") + try: + chat_repository.add_prompter_message(id=id, message_request=message_request) + except Exception: + logger.exception("Error adding prompter message") + return fastapi.Response(status_code=500) - chat.conversation.messages.append( - protocol.ConversationMessage( - text=message_request.message, - is_assistant=False, - ) - ) - - chat.pending_message_request = message_request - chat.message_request_state = MessageRequestState.pending - - async def event_generator(): + async def event_generator(id): result_data = [] try: while True: if await fastapi_request.is_disconnected(): logger.warning("Client disconnected") - break + return - item = await redisClient.blpop(chat.id, 1) + item = await redisClient.blpop(id, 1) if item is None: continue @@ -135,47 +142,44 @@ async def create_message(id: str, message_request: MessageRequest, fastapi_reque yield { "retry": settings.sse_retry_timeout, - "data": TokenResponseEvent(token=response_packet.token).json(), + "data": interface.TokenResponseEvent(token=response_packet.token).json(), } - logger.info(f"Finished streaming {chat.id} {len(result_data)=}") + logger.info(f"Finished streaming {id} {len(result_data)=}") except Exception: - logger.exception(f"Error streaming {chat.id}") + logger.exception(f"Error streaming {id}") + raise - chat.conversation.messages.append( - protocol.ConversationMessage( - text=response_packet.generated_text.text, - is_assistant=True, - ) - ) - chat.pending_message_request = None + try: + with contextlib.contextmanager(create_session)() as session: + chat_repository = create_chat_repository(session) + chat_repository.add_assistant_message(id=id, text=response_packet.generated_text.text) + except Exception: + logger.exception("Error adding assistant message") - return EventSourceResponse(event_generator()) + return EventSourceResponse(event_generator(id)) @app.websocket("/work") -async def work(websocket: fastapi.WebSocket): +async def work(websocket: fastapi.WebSocket, chat_repository: ChatRepository = Depends(create_chat_repository)): await websocket.accept() worker_config = inference.WorkerConfig.parse_raw(await websocket.receive_text()) try: while True: - print(websocket.client_state) if websocket.client_state == fastapi.websockets.WebSocketState.DISCONNECTED: logger.warning("Worker disconnected") break # find a pending task that matches the worker's config # could also be implemented using task queues # but general compatibility matching is tricky - for chat in CHATS.values(): - if (request := chat.pending_message_request) is not None: - if chat.message_request_state == MessageRequestState.pending: - if request.compatible_with(worker_config): - break + for chat in chat_repository.get_pending_chats(): + request = chat.pending_message_request + if request.compatible_with(worker_config): + break else: - logger.debug("No pending tasks") await asyncio.sleep(1) continue - chat.message_request_state = MessageRequestState.in_progress + chat_repository.set_chat_state(chat.id, interface.MessageRequestState.in_progress) work_request = inference.WorkRequest( conversation=chat.conversation, @@ -183,29 +187,39 @@ async def work(websocket: fastapi.WebSocket): max_new_tokens=request.max_new_tokens, ) - logger.info(f"Created {work_request}") + logger.info(f"Created {work_request=}") try: await websocket.send_text(work_request.json()) except websockets.exceptions.ConnectionClosedError: logger.warning("Worker disconnected") websocket.close() - chat.message_request_state = MessageRequestState.pending + chat_repository.set_chat_state(chat.id, interface.MessageRequestState.pending) break + logger.debug(f"Sent {work_request=} to worker.") + try: + in_progress = False while True: # maybe unnecessary to parse and re-serialize # could just pass the raw string and mark end via empty string response_packet = inference.WorkResponsePacket.parse_raw(await websocket.receive_text()) + in_progress = True await redisClient.rpush(chat.id, response_packet.json()) if response_packet.is_end: + logger.debug(f"Received {response_packet=} from worker. Ending.") break except fastapi.WebSocketException: # TODO: handle this better logger.exception(f"Websocket closed during handling of {chat.id}") - chat.message_request_state = MessageRequestState.aborted_by_worker + if in_progress: + logger.warning(f"Aborting {chat.id=}") + chat_repository.set_chat_state(chat.id, interface.MessageRequestState.aborted_by_worker) + else: + logger.warning(f"Marking {chat.id=} as pending since no work was done.") + chat_repository.set_chat_state(chat.id, interface.MessageRequestState.pending) raise - chat.message_request_state = MessageRequestState.complete + chat_repository.set_chat_state(chat.id, interface.MessageRequestState.complete) except fastapi.WebSocketException: logger.exception("Websocket closed") diff --git a/inference/server/oasst_inference_server/chat_repository.py b/inference/server/oasst_inference_server/chat_repository.py new file mode 100644 index 00000000..52cb9543 --- /dev/null +++ b/inference/server/oasst_inference_server/chat_repository.py @@ -0,0 +1,79 @@ +import fastapi +import sqlmodel +from loguru import logger +from oasst_inference_server import interface, models +from oasst_shared.schemas import protocol +from sqlalchemy.sql.operators import is_not + + +class ChatRepository: + def __init__(self, session: sqlmodel.Session) -> None: + self.session = session + + def get_chats(self) -> list[models.DbChatEntry]: + return self.session.exec(sqlmodel.select(models.DbChatEntry)).all() + + def get_pending_chats(self) -> list[models.DbChatEntry]: + return self.session.exec( + sqlmodel.select(models.DbChatEntry).where( + is_not(models.DbChatEntry.pending_message_request, None), + models.DbChatEntry.message_request_state == interface.MessageRequestState.pending, + ) + ).all() + + def get_chat_list(self) -> list[interface.ChatListEntry]: + chats = self.get_chats() + return [chat.to_list_entry() for chat in chats] + + def get_chat_by_id(self, id: str) -> models.DbChatEntry: + chat = self.session.exec(sqlmodel.select(models.DbChatEntry).where(models.DbChatEntry.id == id)).one() + return chat + + def get_chat_entry_by_id(self, id: str) -> interface.ChatEntry: + return self.get_chat_by_id(id).to_entry() + + def create_chat(self) -> models.DbChatEntry: + chat = models.DbChatEntry() + self.session.add(chat) + self.session.commit() + return chat + + def add_prompter_message(self, id: str, message_request: interface.MessageRequest) -> None: + logger.info(f"Adding prompter message {message_request} to chat {id}") + chat = self.get_chat_by_id(id) + if not chat.conversation.is_prompter_turn: + raise fastapi.HTTPException(status_code=400, detail="Not your turn") + if chat.pending_message_request is not None: + raise fastapi.HTTPException(status_code=400, detail="Already pending") + + chat.conversation.messages.append( + protocol.ConversationMessage( + text=message_request.message, + is_assistant=False, + ) + ) + + chat.pending_message_request = message_request + chat.message_request_state = interface.MessageRequestState.pending + self.session.commit() + logger.debug(f"Added prompter message {message_request} to chat {id}") + + def add_assistant_message(self, id: str, text: str) -> None: + logger.info(f"Adding assistant message {text} to chat {id}") + chat = self.get_chat_by_id(id) + chat.conversation.messages.append( + protocol.ConversationMessage( + text=text, + is_assistant=True, + ) + ) + chat.pending_message_request = None + self.session.commit() + logger.debug(f"Added assistant message {text} to chat {id}") + + def set_chat_state(self, id: str, state: interface.MessageRequestState) -> None: + logger.info(f"Setting chat {id} state to {state}") + chat = self.get_chat_by_id(id) + chat.message_request_state = state + self.session.commit() + logger.debug(f"Set chat {id} state to {state}") diff --git a/inference/server/oasst_inference_server/database.py b/inference/server/oasst_inference_server/database.py new file mode 100644 index 00000000..c714b6c9 --- /dev/null +++ b/inference/server/oasst_inference_server/database.py @@ -0,0 +1,41 @@ +import json + +import pydantic.json +import sqlmodel +from loguru import logger +from oasst_inference_server import models +from oasst_inference_server.settings import settings + + +def default_json_serializer(obj): + class_name = obj.__class__.__name__ + encoded = pydantic.json.pydantic_encoder(obj) + encoded["_classname_"] = class_name + return encoded + + +def custom_json_serializer(obj): + return json.dumps(obj, default=default_json_serializer) + + +def custom_json_deserializer(s): + d = json.loads(s) + if not isinstance(d, dict): + return d + match d.get("_classname_"): + case "Conversation": + return models.protocol.Conversation.parse_obj(d) + case "MessageRequest": + return models.interface.MessageRequest.parse_obj(d) + case None: + return d + case _: + logger.error(f"Unknown class {d['_classname_']}") + raise ValueError(f"Unknown class {d['_classname_']}") + + +db_engine = sqlmodel.create_engine( + settings.database_uri, + json_serializer=custom_json_serializer, + json_deserializer=custom_json_deserializer, +) diff --git a/inference/server/oasst_inference_server/interface.py b/inference/server/oasst_inference_server/interface.py new file mode 100644 index 00000000..7fecffa1 --- /dev/null +++ b/inference/server/oasst_inference_server/interface.py @@ -0,0 +1,41 @@ +import enum + +import pydantic +from oasst_shared.schemas import inference, protocol + + +class MessageRequest(pydantic.BaseModel): + message: str = pydantic.Field(..., repr=False) + model_name: str = "distilgpt2" + max_new_tokens: int = 100 + + def compatible_with(self, worker_config: inference.WorkerConfig) -> bool: + return self.model_name == worker_config.model_name + + +class TokenResponseEvent(pydantic.BaseModel): + token: inference.TokenResponse + + +class MessageRequestState(str, enum.Enum): + pending = "pending" + in_progress = "in_progress" + complete = "complete" + aborted_by_worker = "aborted_by_worker" + + +class CreateChatRequest(pydantic.BaseModel): + pass + + +class ChatListEntry(pydantic.BaseModel): + id: str + + +class ChatEntry(pydantic.BaseModel): + id: str + conversation: protocol.Conversation + + +class ListChatsResponse(pydantic.BaseModel): + chats: list[ChatListEntry] diff --git a/inference/server/oasst_inference_server/models.py b/inference/server/oasst_inference_server/models.py new file mode 100644 index 00000000..f1a32438 --- /dev/null +++ b/inference/server/oasst_inference_server/models.py @@ -0,0 +1,23 @@ +from uuid import uuid4 + +import sqlalchemy as sa +import sqlalchemy.dialects.postgresql as pg +from oasst_inference_server import interface +from oasst_shared.schemas import protocol +from sqlmodel import Field, SQLModel + + +class DbChatEntry(SQLModel, table=True): + __tablename__ = "chat" + + id: str = Field(default_factory=lambda: str(uuid4()), primary_key=True) + + conversation: protocol.Conversation = Field(default_factory=protocol.Conversation, sa_column=sa.Column(pg.JSONB)) + pending_message_request: interface.MessageRequest | None = Field(None, sa_column=sa.Column(pg.JSONB)) + message_request_state: interface.MessageRequestState | None = Field(None, sa_column=sa.Column(pg.JSONB)) + + def to_list_entry(self) -> interface.ChatListEntry: + return interface.ChatListEntry(id=self.id) + + def to_entry(self) -> interface.ChatEntry: + return interface.ChatEntry(id=self.id, conversation=self.conversation) diff --git a/inference/server/oasst_inference_server/settings.py b/inference/server/oasst_inference_server/settings.py new file mode 100644 index 00000000..e0a4d914 --- /dev/null +++ b/inference/server/oasst_inference_server/settings.py @@ -0,0 +1,38 @@ +from typing import Any + +import pydantic + + +class Settings(pydantic.BaseSettings): + redis_host: str = "localhost" + redis_port: int = 6379 + redis_db: int = 0 + + sse_retry_timeout: int = 15000 + update_alembic: bool = True + alembic_retries: int = 5 + alembic_retry_timeout: int = 1 + + postgres_host: str = "localhost" + postgres_port: str = "5432" + postgres_user: str = "postgres" + postgres_password: str = "postgres" + postgres_db: str = "postgres" + + database_uri: str | None = None + + @pydantic.validator("database_uri", pre=True) + def assemble_db_connection(cls, v: str | None, values: dict[str, Any]) -> Any: + if isinstance(v, str): + return v + return pydantic.PostgresDsn.build( + scheme="postgresql", + user=values.get("postgres_user"), + password=values.get("postgres_password"), + host=values.get("postgres_host"), + port=values.get("postgres_port"), + path=f"/{values.get('postgres_db') or ''}", + ) + + +settings = Settings() diff --git a/inference/server/requirements.txt b/inference/server/requirements.txt index e0a00339..fdc60ccd 100644 --- a/inference/server/requirements.txt +++ b/inference/server/requirements.txt @@ -1,6 +1,10 @@ +alembic fastapi[all] loguru +prometheus-fastapi-instrumentator +psycopg2-binary pydantic redis +sqlmodel sse-starlette websockets diff --git a/inference/text-client/__main__.py b/inference/text-client/__main__.py index 4a7fa110..4a0d9e47 100644 --- a/inference/text-client/__main__.py +++ b/inference/text-client/__main__.py @@ -1,6 +1,7 @@ """Simple REPL frontend.""" import json +import time import requests import sseclient @@ -12,28 +13,37 @@ app = typer.Typer() @app.command() def main(backend_url: str = "http://127.0.0.1:8000"): """Simple REPL client.""" - chat_id = requests.post(f"{backend_url}/chat", json={}).json()["id"] while True: - message = typer.prompt("User").strip() + try: + chat_id = requests.post(f"{backend_url}/chat", json={}).json()["id"] + typer.echo(f"Chat ID: {chat_id}") + while True: + message = typer.prompt("User").strip() - # wait for stream to be ready - # could implement a queue position indicator - # could be implemented with long polling - # but server load needs to be considered - response = requests.post( - f"{backend_url}/chat/{chat_id}/message", - json={"message": message}, - stream=True, - headers={"Accept": "text/event-stream"}, - ) - response.raise_for_status() + # wait for stream to be ready + # could implement a queue position indicator + # could be implemented with long polling + # but server load needs to be considered + response = requests.post( + f"{backend_url}/chat/{chat_id}/message", + json={"message": message}, + stream=True, + headers={"Accept": "text/event-stream"}, + ) + response.raise_for_status() - client = sseclient.SSEClient(response) - print("Assistant: ", end="", flush=True) - for event in client.events(): - data = json.loads(event.data) - print(data["token"]["text"], end="", flush=True) - print() + client = sseclient.SSEClient(response) + print("Assistant: ", end="", flush=True) + for event in client.events(): + data = json.loads(event.data) + print(data["token"]["text"], end="", flush=True) + print() + except typer.Abort: + typer.echo("Exiting...") + break + except Exception: + typer.echo("Error, restarting chat...") + time.sleep(1) if __name__ == "__main__": diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py index cea7f257..74cf4d1d 100644 --- a/inference/worker/__main__.py +++ b/inference/worker/__main__.py @@ -1,25 +1,22 @@ -import json - +import interface import rel import requests import sseclient -import typer +import utils import websocket from loguru import logger from oasst_shared.schemas import inference, protocol +from settings import settings -app = typer.Typer() +# touch -@app.command() -def main( - backend_url: str = "ws://localhost:8000", - model_name: str = "distilgpt2", - inference_server_url: str = "http://localhost:8001", -): +def main(): + utils.wait_for_inference_server(settings.inference_server_url) + def on_open(ws: websocket.WebSocket): logger.info("Connected to backend, sending config...") - worker_config = inference.WorkerConfig(model_name=model_name) + worker_config = inference.WorkerConfig(model_name=settings.model_id) ws.send(worker_config.json()) logger.info("Config sent, waiting for work...") @@ -43,19 +40,12 @@ def main( prompt = prefix + "\n".join(messages) + "\nAssistant:" + parameters = interface.GenerateStreamParameters.from_work_request(work_request) response = requests.post( - f"{inference_server_url}/generate_stream", + f"{settings.inference_server_url}/generate_stream", json={ "inputs": prompt, - "parameters": { - "max_new_tokens": work_request.max_new_tokens, - "do_sample": work_request.do_sample, - "top_k": work_request.top_k, - "top_p": work_request.top_p, - "temperature": work_request.temperature, - "seed": work_request.seed, - # "stop": ["\nUser:", "\nAssistant:"], # TODO: make this a bit more workable because it's mutliple tokens - }, + "parameters": parameters.dict(), }, stream=True, headers={"Accept": "text/event-stream"}, @@ -68,29 +58,39 @@ def main( return client = sseclient.SSEClient(response) + stream_response = None + token_buffer = utils.TokenBuffer(stop_sequences=parameters.stop) for event in client.events(): logger.debug(f"Received event: {event}") - data = json.loads(event.data) - if data["generated_text"]: - break - token = data["token"] + stream_response = interface.GenerateStreamResponse.parse_raw(event.data) + token = stream_response.token + for send_token in token_buffer.add(token): + ws.send( + inference.WorkResponsePacket( + token=send_token.to_token_response(), + ).json() + ) + if stream_response is None: + logger.error("No stream response received") + return + + for send_token in token_buffer.finish(reason=stream_response.details.finish_reason): ws.send( inference.WorkResponsePacket( - token=inference.TokenResponse( - text=token["text"], - log_prob=token["logprob"], - token_id=token["id"], - ) + token=send_token.to_token_response(), ).json() ) + ws.send( inference.WorkResponsePacket( is_end=True, generated_text=inference.GeneratedTextResponse( - text=data["generated_text"], + text=stream_response.generated_text, + finish_reason=stream_response.details.finish_reason, ), ).json() ) + logger.info("Work complete. Waiting for more work...") def on_error(ws: websocket.WebSocket, error: Exception): try: @@ -102,7 +102,7 @@ def main( logger.warning(f"Connection closed: {close_status_code=} {close_msg=}") ws = websocket.WebSocketApp( - f"{backend_url}/work", + f"{settings.backend_url}/work", on_message=on_message, on_error=on_error, on_close=on_close, @@ -115,4 +115,4 @@ def main( if __name__ == "__main__": - app() + main() diff --git a/inference/worker/interface.py b/inference/worker/interface.py new file mode 100644 index 00000000..06a3eac6 --- /dev/null +++ b/inference/worker/interface.py @@ -0,0 +1,56 @@ +from typing import Literal + +import pydantic +from oasst_shared.schemas import inference + + +class GenerateStreamParameters(pydantic.BaseModel): + max_new_tokens: int | None + do_sample: bool | None + top_k: int | None + top_p: float | None + temperature: float | None + repetition_penalty: float | None + seed: int | None + stop: list[str] = ["\nUser:", "\nAssistant:"] # TODO: make this a bit more workable because it's mutliple tokens + details: bool = True + + @staticmethod + def from_work_request(work_request: inference.WorkRequest) -> "GenerateStreamParameters": + return GenerateStreamParameters( + max_new_tokens=work_request.max_new_tokens, + do_sample=work_request.do_sample, + top_k=work_request.top_k, + top_p=work_request.top_p, + temperature=work_request.temperature, + repetition_penalty=work_request.repetition_penalty, + seed=work_request.seed, + ) + + +class Token(pydantic.BaseModel): + text: str + logprob: float + id: int + + def __len__(self) -> int: + return len(self.text) + + def to_token_response(self) -> inference.TokenResponse: + return inference.TokenResponse( + text=self.text, + log_prob=self.logprob, + token_id=self.id, + ) + + +class StreamDetails(pydantic.BaseModel): + generated_tokens: int + seed: int | None + finish_reason: Literal["length", "eos_token", "stop_sequence"] + + +class GenerateStreamResponse(pydantic.BaseModel): + token: Token + generated_text: str | None + details: StreamDetails | None diff --git a/inference/worker/requirements.txt b/inference/worker/requirements.txt index 82169379..dd35a5a6 100644 --- a/inference/worker/requirements.txt +++ b/inference/worker/requirements.txt @@ -1,6 +1,6 @@ loguru +pydantic rel requests sseclient-py -typer websocket-client diff --git a/inference/worker/settings.py b/inference/worker/settings.py new file mode 100644 index 00000000..c726479c --- /dev/null +++ b/inference/worker/settings.py @@ -0,0 +1,10 @@ +import pydantic + + +class Settings(pydantic.BaseSettings): + backend_url: str = "ws://localhost:8000" + model_id: str = "distilgpt2" + inference_server_url: str = "http://localhost:8001" + + +settings = Settings() diff --git a/inference/worker/utils.py b/inference/worker/utils.py new file mode 100644 index 00000000..414b6958 --- /dev/null +++ b/inference/worker/utils.py @@ -0,0 +1,62 @@ +import collections +import random +import time +from typing import Literal + +import interface +import requests +from loguru import logger + + +class TokenBuffer: + def __init__(self, stop_sequences: list[str]) -> None: + self.stop_sequences = stop_sequences + self.longest_stop_len = max((len(stop) for stop in stop_sequences), default=0) + self.tokens = collections.deque() + self.token_lens = collections.deque() + self.total_len = 0 + + def add(self, token: interface.Token): + self.tokens.append(token) + self.token_lens.append(len(token)) + self.total_len += len(token) + while True: + if not self.tokens: + break + head_len = self.token_lens[0] + if self.total_len - head_len >= self.longest_stop_len: + token = self.tokens.popleft() + self.token_lens.popleft() + self.total_len -= head_len + yield token + else: + break + + def finish(self, reason: Literal["length", "eos_token", "stop_sequence"]): + if reason == "stop_sequence": + end_sequence = "" + while self.tokens: + end_sequence = self.tokens.pop().text + end_sequence + if end_sequence in self.stop_sequences: + break + yield from self.tokens + else: + yield from self.tokens + + +def wait_for_inference_server(inference_server_url: str, timeout: int = 600): + health_url = f"{inference_server_url}/health" + time_limit = time.time() + timeout + while True: + try: + response = requests.get(health_url) + response.raise_for_status() + except (requests.HTTPError, requests.ConnectionError): + if time.time() > time_limit: + raise + sleep_duration = random.uniform(0, 10) + logger.warning(f"Inference server not ready. Retrying in {sleep_duration} seconds") + time.sleep(sleep_duration) + else: + logger.info("Inference server is ready") + break diff --git a/inference/worker/worker_full_main.sh b/inference/worker/worker_full_main.sh new file mode 100755 index 00000000..c3c7788e --- /dev/null +++ b/inference/worker/worker_full_main.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +text-generation-launcher & + +/opt/miniconda/envs/worker/bin/python /worker & + +wait -n + +exit $? diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md index 655d6469..57e27483 100644 --- a/model/reward/instructor/README.md +++ b/model/reward/instructor/README.md @@ -2,20 +2,22 @@ Trainer code based on huggingface. Compatible with deepspeed or accelerate -Requirements - -``` -wandb -evaluate -datasets -transformers -torch==1.12 -``` - -Start training reward model +Install Python requirements ```bash -python trainer.py configs/electra-base-dis-webgpt.yml +pip install -r requirements.txt +``` + +Write or inherit a `configs/.yml` file to store training +configuration details. + +> The configuration file must have _at least_ all the keys present in +> [`configs/dummy.yml`](configs/dummy.yml) + +Run training procedure + +```bash +python trainer.py configs/.yml ``` Additional axis labeling, this outputs a 4 summary quality evaluation metrics diff --git a/model/reward/instructor/configs/dummy.yml b/model/reward/instructor/configs/dummy.yml new file mode 100644 index 00000000..6c2c9619 --- /dev/null +++ b/model/reward/instructor/configs/dummy.yml @@ -0,0 +1,21 @@ +model_name: X +tokenizer_name: X +max_length: X +num_train_epochs: X +warmup_steps: X +scheduler: X +learning_rate: X +deepspeed: X +fp16: X +local_rank: X +gradient_checkpointing: X +gradient_accumulation_steps: X +per_device_train_batch_size: X +per_device_eval_batch_size: X +weight_decay: X +max_grad_norm: X +eval_steps: X +save_steps: X +wandb_entity: X +datasets: + - X diff --git a/model/supervised_finetuning/README.md b/model/supervised_finetuning/README.md index d5b10e01..387e91e4 100644 --- a/model/supervised_finetuning/README.md +++ b/model/supervised_finetuning/README.md @@ -1,62 +1,18 @@ # Train using supervised examples -Requirements +## Requirements -``` -wandb -evaluate -datasets -transformers -torch -``` +`pip install -r requirements.txt` -Start training reward model +Start training SFT model ```bash -python trainer.py --configs defaults galactica-125 +python trainer.py --configs defaults galactica-125m ``` -## Dataset - -For now we only support webgpt and summary dataset from OpenAI. Once -open-asisstant dataset are available it will be added here. - -## Model - -Normally you should be able to add new models in configs/config.yml - -``` -your-model-name: - learning_rate: 2e-6 - model_name: - weight_decay: 0.01 - max_length: 812 - warmup_steps: 600 - gradient_checkpointing: false - gradient_accumulation_steps: 5 - per_device_train_batch_size: 4 - per_device_eval_batch_size: 4 -``` - -``` -python trainer.py --configs defaults your-model-name -``` - -However, if the model of your choice doesn't have pad_token, eos_token, -sep_token, you have to update utils.py `get_tokenizer` to use the right token. - -## Deepspeed support - -You can edit the configs/zero_config.json and use any stage you wish. The -current config uses zero-stage 3. For more details on how to setup the config -checkout [this page](https://www.deepspeed.ai/tutorials/zero/) - -Once you are satisfy with your deepzero config, you can add --deepspeed flag at -the end to trigger deepspeed - -``` -python trainer.py --configs defaults your-model-name --deepspeed -``` +For `wandb`: update the `entity` argument in `trainer.py`'s call to `wandb.init` +to be your weights and biases username per +[docs](https://docs.wandb.ai/ref/python/init). ## Dataset choices @@ -80,6 +36,74 @@ Currently only these languages are supported via prompt translation: ar,de,fr,en,it,nl,tr,ru,ms,ko,ja,zh ``` +## Dataset sub-sampling + +We can subsample the **training** data by passing either the `fraction` or +`size` argument in the `configs/config.yml` file. Don't forget the additional +colon ":" after the dataset name when doing this. + +Example: + +``` + datasets: + - webgpt: + fraction : 0.05 + - prompt_dialogue: + size : 500 + - adversarial_qa + - trivia_qa_nocontext +``` + +In this example, per epoch we will use: + +- A random 5% of `webgpt`; +- A random 500 examples from `prompt_dialogue`; +- All examples from datasets for which we don't specify the `fraction` or `size` + argument. + +In the above example, per epoch we'll use a different 5% from `webgpt` and a +different 500 examples from `prompt_dialogue`. + +This works with `torch.distributed`. + +## Model + +Normally you should be able to add new models in `configs/config.yml` + +``` +your-model-name: + learning_rate: 2e-6 + model_name: + weight_decay: 0.01 + max_length: 812 + warmup_steps: 600 + gradient_checkpointing: false + gradient_accumulation_steps: 5 + per_device_train_batch_size: 4 + per_device_eval_batch_size: 4 +``` + +``` +python trainer.py --configs defaults your-model-name +``` + +However, if the model of your choice doesn't have `pad_token`, `eos_token`, +`sep_token`, you have to update `get_tokenizer` in `utils.py` to use the right +token. + +## Deepspeed support + +You can edit the configs/zero_config.json and use any stage you wish. The +current config uses zero-stage 3. For more details on how to setup the config +checkout [this page](https://www.deepspeed.ai/tutorials/zero/). + +Once you are satisfy with your deepzero config, you can add --deepspeed flag at +the end to trigger deepspeed + +``` +python trainer.py --configs defaults your-model-name --deepspeed +``` + ## Results Experimental results in wandb @@ -87,7 +111,7 @@ Experimental results in wandb ## TODOS -- decide on a model +- Decide on a model - Merge utils etc with reward model - Casual Modelling for GPT-JT does not leverage the bidirectional mask for the prompt? (https://huggingface.co/togethercomputer/GPT-JT-6B-v1) diff --git a/model/supervised_finetuning/configs/config.yaml b/model/supervised_finetuning/configs/config.yaml index d70fad41..79e4751d 100644 --- a/model/supervised_finetuning/configs/config.yaml +++ b/model/supervised_finetuning/configs/config.yaml @@ -17,7 +17,7 @@ defaults: freeze_layer: datasets: - webgpt - - prompt_dialogue + # - prompt_dialogue - squad_v2 - adversarial_qa - trivia_qa_nocontext diff --git a/model/supervised_finetuning/custom_datasets/qa_datasets.py b/model/supervised_finetuning/custom_datasets/qa_datasets.py index d88b741d..cd4d742a 100644 --- a/model/supervised_finetuning/custom_datasets/qa_datasets.py +++ b/model/supervised_finetuning/custom_datasets/qa_datasets.py @@ -222,7 +222,7 @@ class SODA(Dataset): return pairs - def __init__(self, cache_dir, max_sample_size=10000, input_max_length=1024) -> None: + def __init__(self, cache_dir, input_max_length=1024) -> None: super().__init__() self.pairs = [] @@ -233,9 +233,6 @@ class SODA(Dataset): if len(prompt) < input_max_length: self.pairs.append((prompt, answer)) - if len(self.pairs) > max_sample_size: - break - def __len__(self): return len(self.pairs) diff --git a/model/supervised_finetuning/requirements.txt b/model/supervised_finetuning/requirements.txt index 8f8cc63c..95e5a472 100644 --- a/model/supervised_finetuning/requirements.txt +++ b/model/supervised_finetuning/requirements.txt @@ -4,7 +4,6 @@ datasets==2.8.0 deepspeed==0.7.7 evaluate==0.4.0 gdown -mpi4py==3.1.4 nltk==3.8.1 numpy>=1.22.4 py7zr @@ -12,3 +11,4 @@ PyYAML>=6.0 scikit_learn==1.2.0 torch>=1.11.0 transformers==4.25.1 +wandb diff --git a/model/supervised_finetuning/tests/test_utils.py b/model/supervised_finetuning/tests/test_utils.py index ad40e534..c4982024 100644 --- a/model/supervised_finetuning/tests/test_utils.py +++ b/model/supervised_finetuning/tests/test_utils.py @@ -1,9 +1,28 @@ from argparse import Namespace -from utils import get_tokenizer +import pytest +from utils import TOKENIZER_CONFIGS, get_tokenizer, match_tokenizer_name def test_tokenizer(): get_tokenizer(Namespace(model_name="Salesforce/codegen-2B-multi", cache_dir=".cache")) get_tokenizer(Namespace(model_name="facebook/galactica-1.3b", cache_dir=".cache")) get_tokenizer(Namespace(model_name="", cache_dir=".cache")) + + +def test_tokenizer_successful_match(): + for config_name, config in TOKENIZER_CONFIGS.items(): + found_config = match_tokenizer_name(config_name) + assert found_config == config + + +def test_tokenizer_partial_match(): + for config_name in ["facebook/galactica-1.3b", "togethercomputer/GPT-JT-6B-v1", "Salesforce/codegen-2B-multi"]: + found_config = match_tokenizer_name(config_name) + assert found_config + + +def test_tokenizer_failed_match(): + for fake_config_name in ["not-a-model", "fake"]: + with pytest.raises(ValueError): + match_tokenizer_name(fake_config_name) diff --git a/model/supervised_finetuning/trainer.py b/model/supervised_finetuning/trainer.py index 479ffc7f..02367814 100644 --- a/model/supervised_finetuning/trainer.py +++ b/model/supervised_finetuning/trainer.py @@ -1,7 +1,7 @@ import argparse from distutils.util import strtobool from functools import partial -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import bitsandbytes import datasets @@ -14,7 +14,7 @@ from transformers.trainer_pt_utils import IterableDatasetShard from transformers.trainer_utils import seed_worker from transformers.training_args import OptimizerNames from transformers.utils import is_datasets_available -from utils import get_dataset, get_loss, get_metrics, get_model, get_tokenizer, read_yamls +from utils import PerDatasetSampler, get_dataset, get_loss, get_metrics, get_model, get_tokenizer, read_yamls def compute_metrics(eval_pred, preprocess_fns, metrics): @@ -36,7 +36,7 @@ class SFTTrainer(Trainer): self, model: Union[PreTrainedModel, nn.Module] = None, args: TrainingArguments = None, - train_collate_fn: Callable = None, + sampler: torch.utils.data.sampler.Sampler = None, loss_function: str = "CrossEntropyLoss", poly_eps: float = 1.0, **kwargs, @@ -45,6 +45,7 @@ class SFTTrainer(Trainer): self.train_collate_fn = train_collate_fn # By default CrossEntropyLoss ignores padding_index -100, but just in case use our own loss_fct self.loss_fct = get_loss(loss_function, poly_eps) + self.sampler = sampler def compute_loss(self, model, inputs, return_outputs=False): labels_mask = inputs.pop("label_masks") @@ -95,24 +96,22 @@ class SFTTrainer(Trainer): return (loss, logits, labels) - def get_train_dataloader(self) -> DataLoader: + def get_train_dataloader(self): """ - Returns the training [`~torch.utils.data.DataLoader`]. - Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed - training if necessary) otherwise. - Subclass and override this method if you want to inject some custom behavior. - """ - if self.train_dataset is None: - raise ValueError("Trainer: training requires a train_dataset.") + Inject custom data sampling behaviour into training loop + and use custom task mixing collate function : train_collate_fn - train_dataset = self.train_dataset + rewrite from: + https://github.com/huggingface/transformers/blob/67d074874d285e616393c65a0e670088e1b6b74a/src/transformers/trainer.py#L846 + """ data_collator = self.train_collate_fn + train_dataset = self.train_dataset if is_datasets_available() and isinstance(train_dataset, datasets.Dataset): train_dataset = self._remove_unused_columns(train_dataset, description="training") - else: - data_collator = self._get_collator_with_removed_columns(data_collator, description="training") if isinstance(train_dataset, torch.utils.data.IterableDataset): + # if we are using iterable dataset it means no weight sampling + # added for backward compat if self.args.world_size > 1: train_dataset = IterableDatasetShard( train_dataset, @@ -121,7 +120,6 @@ class SFTTrainer(Trainer): num_processes=self.args.world_size, process_index=self.args.process_index, ) - return DataLoader( train_dataset, batch_size=self.args.per_device_train_batch_size, @@ -129,8 +127,10 @@ class SFTTrainer(Trainer): num_workers=self.args.dataloader_num_workers, pin_memory=self.args.dataloader_pin_memory, ) - - train_sampler = self._get_train_sampler() + if self.sampler is None: + train_sampler = self._get_train_sampler() + else: + train_sampler = self.sampler return DataLoader( train_dataset, @@ -194,10 +194,9 @@ if __name__ == "__main__": tokenizer = get_tokenizer(training_conf) model = get_model(training_conf, tokenizer) - train, evals, collate_fn, train_collate_fn = get_dataset(training_conf, tokenizer) + sampler = PerDatasetSampler.build_sampler_from_config(training_conf, train.datasets) metrics, preprocess_fns = get_metrics(training_conf, tokenizer) - optimizer = OptimizerNames.ADAMW_BNB if training_conf.quantization else OptimizerNames.ADAMW_HF if training_conf.quantization: @@ -235,7 +234,6 @@ if __name__ == "__main__": ) assert len(evals) > 0 - if not training_conf.deepspeed or training_conf.local_rank == 0: import wandb @@ -246,8 +244,9 @@ if __name__ == "__main__": ) trainer = SFTTrainer( - model, - args, + model=model, + args=args, + sampler=sampler, train_collate_fn=train_collate_fn, loss_function=training_conf.loss_fn, poly_eps=training_conf.poly_eps, diff --git a/model/supervised_finetuning/utils.py b/model/supervised_finetuning/utils.py index bd7ae871..86e48a52 100644 --- a/model/supervised_finetuning/utils.py +++ b/model/supervised_finetuning/utils.py @@ -1,11 +1,8 @@ -# from functools import partial +import random from pathlib import Path -from typing import NamedTuple +from typing import List, NamedTuple import evaluate - -# import nltk -# import numpy as np import transformers import yaml from custom_datasets import get_one_dataset @@ -15,6 +12,79 @@ from losses import CrossEntropyLoss, PolyLoss from models import freeze_top_n_layers, get_specific_model from sklearn.model_selection import train_test_split from torch.utils.data import ConcatDataset, Subset +from torch.utils.data.sampler import Sampler + + +class PerDatasetSampler(Sampler): + """Sampler which returns a fixed number of samples per dataset, per epoch. + + Example: + + Dataset 1 has 10,000 examples and we want 200 per epoch + Dataset 2 has 500 examples and we want all 500 per epoch + + Epoch size will be 700 and every epoch we'll sample a different + 200 from dataset 1. + + Parameters + ---------- + dataset_sizes : List[int] + A list with the size of each dataset. + dataset_size_per_epoch : List[int] + How many examples to get from each dataset per epoch. + + Note: dataset_sizes & dataset_size_per_epoch must be in the same order. + Further the examples in the underlying torch.utils.data.Dataset + must per ordered as dataset_1, dataset_2, ..., dataset_n. This is fine + if we concatenate a bunch of datasets together + e.g. using torch.utils.data.ConcatDataset which is current behaviour. + """ + + def __init__(self, dataset_sizes: List[int], dataset_size_per_epoch: List[int]): + self.dataset_sizes = dataset_sizes + self.dataset_size_per_epoch = dataset_size_per_epoch + self.num_datasets = len(dataset_sizes) + + def __iter__(self): + epoch_idx = [] + n = 0 + for i in range(self.num_datasets): + sampled_idx = random.sample(range(n, self.dataset_sizes[i] + n), self.dataset_size_per_epoch[i]) + n += self.dataset_sizes[i] + epoch_idx.extend(sampled_idx) + random.shuffle(epoch_idx) + return iter(epoch_idx) + + def __len__(self): + return int(sum(self.dataset_size_per_epoch)) + + @classmethod + def build_sampler_from_config(cls, training_conf, datasets): + dataset_sizes = [len(x) for x in datasets] + fractions = get_dataset_fractions(training_conf.datasets, dataset_sizes) + dataset_size_per_epoch = [int(size * frac) for size, frac in zip(dataset_sizes, fractions)] + return cls(dataset_sizes, dataset_size_per_epoch) + + +def get_dataset_fractions(conf, dataset_sizes): + """Calculate fraction of each dataset to use per epoch when subsampling""" + fractions = [] + for i, data_config in enumerate(conf): + dataset_name = get_dataset_name_from_data_config(data_config) + if isinstance(data_config, dict): + if "fraction" in data_config[dataset_name]: + if data_config[dataset_name]["fraction"] <= 0: + raise ValueError("Please specify fraction as a value between 0 < fraction <= 1") + fractions.append(min(1, data_config[dataset_name]["fraction"])) + elif "size" in data_config[dataset_name]: + if data_config[dataset_name]["size"] > dataset_sizes[i]: + raise ValueError(f"Please specify a size smaller than number of examples: {dataset_sizes[i]:,.0f}") + fractions.append(data_config[dataset_name]["size"] / dataset_sizes[i]) + else: + raise ValueError("Please specify either fraction or size in config.yaml. See README for instructions.") + else: + fractions.append(1) + return fractions class SpecialTokens(NamedTuple): @@ -36,7 +106,10 @@ TOKENIZER_CONFIGS = { def match_tokenizer_name(model_name: str) -> TokenizerConfig: - """Match a partial model name to a tokenizer configuration""" + """ + Match a partial model name to a tokenizer configuration + i.e. model_name `Salesforce/codegen-2B-multi` has config name `codegen` + """ tokenizer_config_matches = [config for name, config in TOKENIZER_CONFIGS.items() if name in model_name] if not tokenizer_config_matches: raise ValueError(f"Cannot find any tokeniser configuration to match {model_name=}") @@ -140,10 +213,17 @@ def get_model(conf, tokenizer): return model +def get_dataset_name_from_data_config(data_config): + if isinstance(data_config, dict): + return list(data_config.keys())[0] + return data_config + + def get_dataset(conf, tokenizer): train_datasets, evals = [], {} - for dataset_name in conf.datasets: + for data_config in conf.datasets: + dataset_name = get_dataset_name_from_data_config(data_config) train, val = get_one_dataset(conf, dataset_name) train_datasets.append(train) evals[dataset_name] = Subset(val, list(range(min(len(val), conf.eval_size)))) if conf.eval_size else val diff --git a/notebooks/data-augmentation/movie-dialogs/README.md b/notebooks/data-augmentation/movie-dialogs/README.md new file mode 100644 index 00000000..9f128b6e --- /dev/null +++ b/notebooks/data-augmentation/movie-dialogs/README.md @@ -0,0 +1,42 @@ +## Dataset Summary + +The dataset was created using +[Cornell Movies Dialog Corpus](https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html) +which contains a large metadata-rich collection of fictional conversations +extracted from raw movie scripts. Dialogs and meta-data from the underlying +Corpus were used to design a dataset that can be used to InstructGPT based +models to learn movie scripts. + +Example : + +``` +User: Assume RICK and ALICE are characters from a fantasy-horror movie, continue the conversation between them + RICK: I heard you screaming. Was it a bad one? + ALICE: It was bad. + RICK: Doesn't the dream master work for you anymore? +Assistant: Sure + ALICE: I can't find him. + RICK: Hey, since when do you play Thomas Edison? This looks like Sheila's. + ALICE: It is...was. It's a zapper, it might help me stay awake. + RICK: Yeah, or turn you into toast. +``` + +## Usage + +```python + +from datasets import load_dataset +dataset = load_dataset("shahules786/OA-cornell-movies-dialog") +``` + +## Citations + +``` +@InProceedings{Danescu-Niculescu-Mizil+Lee:11a, + author={Cristian Danescu-Niculescu-Mizil and Lillian Lee}, + title={Chameleons in imagined conversations: + A new approach to understanding coordination of linguistic style in dialogs.}, + booktitle={Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics, ACL 2011}, + year={2011} +} +``` diff --git a/notebooks/data-augmentation/movie-dialogs/convert-to-instruction-format.ipynb b/notebooks/data-augmentation/movie-dialogs/convert-to-instruction-format.ipynb new file mode 100644 index 00000000..9d82bdaf --- /dev/null +++ b/notebooks/data-augmentation/movie-dialogs/convert-to-instruction-format.ipynb @@ -0,0 +1,649 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ec8d6189", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/movie-dialogs/convert-to-instruction-format.ipynb)" + ] + }, + { + "cell_type": "markdown", + "id": "493f2529", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "65a47f83", + "metadata": {}, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "import numpy as np\n", + "import json\n", + "from tqdm import tqdm\n", + "\n", + "IMDB = 7.0" + ] + }, + { + "cell_type": "markdown", + "id": "480440f6", + "metadata": {}, + "source": [ + "## Dialog templates\n", + "Templates for converting dialogs to prompts" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "fcfedd7f", + "metadata": {}, + "outputs": [], + "source": [ + "DIALOG_TEMPLATES = {\n", + " ### template for 4+ line dialogs\n", + " \"four_more_lines\": [\n", + " \"\"\"\n", + "Here's a {template} between {char1} and {char2} in a scene from a {genre} movie\n", + " {dialogue1}\n", + "User : Can you continue the {template}\n", + "Assistant : Sure, the next dialogue for this scene could be\n", + " {dialogue2}\n", + " \"\"\",\n", + " \"\"\"\n", + " {dialogue1}\n", + "User : Can you provide more dialog assuming {genre} movie\n", + " {dialogue2}\n", + "\"\"\",\n", + " \"\"\"\n", + "I'm trying to complete the dialog for my characters {char1} and {char2}. Here's the {template}, Please help me complete it\n", + " {dialogue1}\n", + "Assistant : Sure\n", + " {dialogue2}\n", + "\"\"\",\n", + " \"\"\"\n", + "User : Assume {char1} and {char2} are characters from a {genre} movie, continue the conversation between them\n", + " {dialogue1}\n", + "Assistant : Sure\n", + " {dialogue2}\n", + "\"\"\",\n", + " ],\n", + " ## template for 4 line dialogs\n", + " \"four_lines\": [\n", + " \"\"\"\n", + " {dialogue1}\n", + "User : provide a response assuming you're {char2}\n", + "Assistant : Sure\n", + " {dialogue2}\n", + "\"\"\",\n", + " \"\"\"\n", + " {dialogue1}\n", + "User : respond as {char2} to complete the conversation\n", + "Assistant : Sure\n", + " {dialogue2}\n", + "\"\"\",\n", + " ],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "2047056e", + "metadata": {}, + "source": [ + "- Download Cornell-movies dialog dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e413a053", + "metadata": {}, + "outputs": [], + "source": [ + "! wget wget https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip\n", + "! unzip movie-corpus.zip -d ./Data/" + ] + }, + { + "cell_type": "markdown", + "id": "5e2aab0d", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "25cae04e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_movie_dialogs():\n", + "\n", + " with open(\"./Data/movie-corpus/utterances.jsonl\", \"r\") as json_file:\n", + " conversations = list(json_file)\n", + " speakers = json.load(open(\"./Data/movie-corpus/speakers.json\"))\n", + " movie_dialog_dict = {}\n", + " for dialog in tqdm(conversations):\n", + " dialog = eval(dialog.replace(\"null\", \"None\"))\n", + " movie_dialog_dict[dialog[\"id\"]] = {\n", + " \"characterName\": speakers[dialog[\"speaker\"]][\"meta\"][\"character_name\"],\n", + " \"text\": dialog[\"text\"],\n", + " \"characterID\": dialog[\"speaker\"],\n", + " }\n", + "\n", + " return movie_dialog_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3b949bc7", + "metadata": {}, + "outputs": [], + "source": [ + "def get_dialogs(dialog_dict, start, end):\n", + "\n", + " dialog_list = []\n", + " for idx in range(start, end + 1):\n", + " dialog_list.append(dialog_dict[f\"L{idx}\"][\"characterName\"] + \": \" + dialog_dict[f\"L{idx}\"][\"text\"])\n", + " num_lines = len(dialog_list)\n", + "\n", + " assert num_lines >= 1, \"Number of lines should be greater than one\"\n", + "\n", + " if num_lines < 6:\n", + " dialog1 = \"\\n \".join(dialog_list[:-1])\n", + " dialog2 = dialog_list[-1]\n", + " else:\n", + " dialog_len = np.random.randint(3, (num_lines // 2) + 1)\n", + " dialog1 = \"\\n \".join(dialog_list[:dialog_len])\n", + " dialog2 = \"\\n \".join(dialog_list[dialog_len:])\n", + "\n", + " return dialog1, dialog2\n", + "\n", + "\n", + "def choose_prompt(num_lines):\n", + "\n", + " assert num_lines >= 1, \"Number of lines should be greater than one\"\n", + "\n", + " if num_lines < 6:\n", + " prompt = np.random.choice(DIALOG_TEMPLATES[\"four_lines\"])\n", + "\n", + " else:\n", + " prompt = np.random.choice(DIALOG_TEMPLATES[\"four_more_lines\"])\n", + "\n", + " return prompt\n", + "\n", + "\n", + "def convert_to_prompts(dataset, movie_dialog_dict, output_dir=\".\", split=\"train\"):\n", + "\n", + " with open(f\"{output_dir}/{split}.jsonl\", \"w\", encoding=\"utf8\") as output:\n", + "\n", + " i = 0\n", + " while i < len(dataset[\"train\"]):\n", + "\n", + " data = dataset[split][i]\n", + " if float(data[\"movieIMDBRating\"].strip()) >= IMDB:\n", + " max_lines = np.random.randint(7, 12)\n", + " lineids = [int(lineid[1:]) for lineid in data[\"utterance\"][\"LineID\"]]\n", + " num_lines = len(lineids)\n", + " char_ids = sorted([data[\"characterID1\"].strip(), data[\"characterID1\"].strip()])\n", + " while num_lines < max_lines:\n", + " i += 1\n", + " data = dataset[split][i]\n", + " char_id_new = sorted([data[\"characterID1\"].strip(), data[\"characterID1\"].strip()])\n", + " ## make sure that characters are the same\n", + " if char_id_new == char_ids:\n", + " lineids_new = [int(lineid[1:]) for lineid in data[\"utterance\"][\"LineID\"]]\n", + " if lineids_new[0] == (lineids[-1] + 1): ##ensure continuety\n", + " lineids.extend(lineids_new)\n", + " else:\n", + " break\n", + " else:\n", + " break\n", + " num_lines = len(lineids)\n", + "\n", + " genre = \"-\".join(data[\"movieGenres\"][:2])\n", + " template = np.random.choice([\"dialog\", \"script\", \"play\"])\n", + " char1 = movie_dialog_dict[f\"L{lineids[0]}\"][\"characterName\"]\n", + "\n", + " if num_lines < 6:\n", + " if num_lines % 2 == 0:\n", + " char2 = movie_dialog_dict[f\"L{lineids[1]}\"][\"characterName\"]\n", + " else:\n", + " char2 = char1\n", + " else:\n", + " char2 = movie_dialog_dict[f\"L{lineids[1]}\"][\"characterName\"]\n", + "\n", + " dialogue1, dialogue2 = get_dialogs(movie_dialog_dict, lineids[0], lineids[-1])\n", + " prompt = choose_prompt(num_lines)\n", + "\n", + " prompt = prompt.format(\n", + " char1=char1, char2=char2, dialogue1=dialogue1, dialogue2=dialogue2, genre=genre, template=template\n", + " )\n", + " output.write(f\"{json.dumps({'conversation': prompt})}\\n\")\n", + " i += 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "3ff310fd", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|███████████████████████████████| 304713/304713 [00:54<00:00, 5628.12it/s]\n", + "Found cached dataset cornell_movie_dialog (/home/shahul/.cache/huggingface/datasets/cornell_movie_dialog/default/0.1.0/b67b3433cf894b551cddcd82efdff0826f39b39a11d5c149e746a546a8dc85f3)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6fee977c69a3403ebe77c4669fcb25d7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00 Thomas ponders what he has read as he looks to the right ; not a single car can be seen . The street appears infinite in length and the buildings fade in to the distance with it . He stands and begins his first step down the street . His movement halts when he hears a young voice behind him , “ You look thirsty mister . Would you like some lemonade ? ” Thomas walks back past the park entrance and over to the lemonade stand , wondering how he had not noticed it before . It is beautiful , the entrance ; but the park is closed now . Thomas stares up at the gates in awe . Thomas is interrupted again by the child , “ $ 5.50 , please. ” Thomas looks at the counter , flustered . “ I ’ ll have the punch instead. ” As the child pours the purple drink in to the cup , Thomas reaches in his pocket finding a five dollar bill and three quarters . “ Keep the change ” , Thomas says as he picks up his drink . Thomas sips and the sky slowly dims . He feels his breath drawn away from him as a comet sails over the park entrance . And Heaven ’ s Gate opens . |\n", + "|2|[ CW ] [ PM ] Write your hero into a corner , and let me get them out .|Bob dropped five of the Zeds , reloaded his Colt 45 , and ran up the stairs . He had someone currently upstairs , alerting Search and Rescue to find a place to land in this urban , industrial nightmare . They were currently in a truck depot , the places where goods would be transferred truck from truck . Already , some men defending the front door had been pulled in , causing the rest to fall back . The first , and only , line of physical defense , the hardened steel gates , created to stop robbers , were badly banged up , from the onslaught of fists against it . It was bad enough that the zombies managed to cram two at once inside the doorway , but losing the gates would mean that the horde would rush in . `` Hey ! '' Courtney rushed outside the communications office , her .22 rifle in hand . `` They 're at the trainstation , just a block from here ! '' `` It 's probably too late , mate . '' Bob said back , `` Just look at 'em ! '' The metal steps leading to the elevated walkway was a savior , only allowing one body to get in at a time . Unfortunately , our heroes had just fought their way here , from a few streets down . Seems easy ? Not when you have to take detours through heavily infested buildings because of blockades in the roads , or just the sheer number of walkers wouldn't 've allowed you to run through them . Bob 's equipped with a Colt 1911 .45 caliber pistol , excellent at punching through heads , but at the cost of heavy kickback . Also due to it 's temptingness , Bob has used all but three 7-round magazines . He has a knife , but who the hell would be able to take anyone out with that ? Courtney has her 10/22 Ruger Takedown . Initially intended for long range hunting , the rifle particularly excels at going through targets cleanly . The only disadvantage is the lack of stopping power . They have a fully gassed up FedEx truck at their disposal . A few men inside , surrounded , but armed , are ready to go when you tell them where they need to go . Around 31 zombies have gotten in already , with god knows how much outside .|\n", + "|3|[ cw ] write about the strangest/scariest/saddest dream you 've ever had in less than 200 words .|The night was as thick and terrifying as any I had ever seen before . All I could hear was the scream of the wind past my ears , the pounding of hooves , huffed horse breaths , and the pounding of my own heart . The woods were closeknit , and my path was barely visible , hidden under a thick layer of bracken . `` Faster , '' I whispered as I dug my heels in . Safety was close and yet so far away , calling to me . He would save me ; I knew it with all my heart . All I had to do was outrun the demons at my back first .|\n", + "\n", + "Just in case anyone wants the [prompt tag description](https://www.reddit.com/r/WritingPrompts/wiki/how_to_tag_prompts/).\n", + "\n", + "@ontocord , can you improve the issue details having the samples above, please?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. ontocord" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Interesting how the [XX] tags are used. I wasn't thinking about those.\n", + "\n", + "I was thinking of Instructions -> answers like \"User: write me a story about {stripped_prompt} -> Rosey: Sure, here's a story about {stripped_prompt}: {story}\"\n", + "where stripped_prompt removes things like \"write about\" \"in less than 200 words\", etc.\n", + "\n", + "And the inverse \"User: What is this story about {story} -> Rosey: I think it's about {striped_prompt}\"\n", + "\n", + "You could also do summarization of longer stories into 4 or 5 pointed sentences and ask for an outline. Or you could give an outline and ask Rosey to fill in the story.\n", + "\n", + "For the prompt tag, you could add constraings to the prompts based on the tag. So for [RF], you could add to the end of the actual instruciton: this story could {have happened before or should be able to happen in the real world to unknown people. Not what you think could happen in the future.}\n", + "\n", + "Lmk know if you need more input." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. ontocord" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Also these instructions:\n", + "“write a story about {prompt}, ending with the sentence {last_sentence}”. “write a story about {prompt}, where the beginning of the story is about {summary of the beginning part}”. “write a story about {prompt}, where the middle of the story is about {summary of the middle part}”. “write a story about {prompt}, where the end of the story is about {summary of the end part}”" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The goal of this task was to auto-generate question/answer samples from writingPrompts to feed openAssistant. To do that we should standardize the way a prompt was written. Our choice was to set prompt templates which might turn the generation process feasible. Here are the templates we applied:\n", + "\n", + "* Base template: every prompt would have this sample.\n", + "> User: write me a story about: {stripped_prompt} -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\n", + "\n", + "where `stripped_promt` is the cleared prompt output by regex pattern to take out parts of a prompt that would not fit the template. And `story` is the actual answer to a prompt.\n", + "\n", + "* General constraints: a prompt whose constraint was found by regex pattern would have this also.\n", + "> Base template, {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\n", + "\n", + "where `stripped_constraint` is the constraint found.\n", + "\n", + "* Answer beginning constraints: this constraint was imposed by the way the answer should start. \n", + "> Base template, starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\n", + "\n", + "where `beginning` is the first sentence of a story.\n", + "\n", + "* Answer end constraints: this constraint was imposed by the way the answer should end. \n", + "> Base template, ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\n", + "\n", + "where `ending` is the last sentence of a story.\n", + "\n", + "* Answer middle constraints: this constraint was imposed by the way the answer should have in its middle text. \n", + "> Base template, where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\n", + "\n", + "where `middle` is a summary of a story without the first and last sentence brought by a generative model\n", + "\n", + "To get the samples we used the following pipeline:\n", + "\n", + "* **Get data**: download from kaggle\n", + "* **Pre-processing**: load data from entails source/taget (aka: prompt/story) by every split (train/valid/test) merging into one pandas dataframe, enhancing tit with tabular info about the sample tags.\n", + "* **Triage prompts**: we pick prompts sorted by frequency, and we built regex pattern for some of them to extract a striped prompt and the related constraint.\n", + "* **Split stories**: after removing story beginning and ending sentences, we applied a sentence sliding window to get stories middle summaries." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get data from Kaggle\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# helper functions\n", + "import json\n", + "\n", + "\n", + "def save_credentials(d):\n", + " with open(\"/root/.kaggle/kaggle.json\", \"w\") as outfile:\n", + " json.dump(d, outfile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mv: cannot stat '/mnt/home/fabraz/kaggle.json': No such file or directory\n" + ] + } + ], + "source": [ + "# uncomment the following instructions, in case you want to save a .kaggle.json\n", + "# d = {}\n", + "# d['username'] = 'user'\n", + "# d['key'] = 'key'\n", + "#!mkdir ~/.kaggle\n", + "# save_credentials(d)\n", + "!mv ~/kaggle.json ~/.kaggle/\n", + "!chmod 600 ~/.kaggle/kaggle.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install kaggle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/bin/bash: kaggle: command not found\n" + ] + } + ], + "source": [ + "!kaggle datasets download -d ratthachat/writing-prompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: writing-prompts.zip\n", + " inflating: writingPrompts/README \n", + " inflating: writingPrompts/test.wp_source \n", + " inflating: writingPrompts/test.wp_target \n", + " inflating: writingPrompts/train.wp_source \n", + " inflating: writingPrompts/train.wp_target \n", + " inflating: writingPrompts/valid.wp_source \n", + " inflating: writingPrompts/valid.wp_target \n" + ] + } + ], + "source": [ + "!unzip writing-prompts.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from IPython.display import display, HTML" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# helper functions\n", + "import re\n", + "\n", + "\n", + "def load_file(path, names):\n", + " with open(path, \"r\") as f:\n", + " lines = f.readlines()\n", + " return pd.DataFrame(lines, columns=names)\n", + "\n", + "\n", + "def load_data():\n", + " tags = {\n", + " \"WP\": \"Writing Prompt\",\n", + " \"SP\": \"Simple Prompt\",\n", + " \"EU\": \"Established Universe\",\n", + " \"CW\": \"Constrained Writing\",\n", + " \"TT\": \"Theme Thursday\",\n", + " \"PM\": \"Prompt Me\",\n", + " \"MP\": \"Media Prompt\",\n", + " \"IP\": \"Image Prompt\",\n", + " \"PI\": \"Prompt Inspired\",\n", + " \"OT\": \"Off Topic\",\n", + " \"RF\": \"Reality Fiction\",\n", + " }\n", + "\n", + " dfConcat = pd.DataFrame()\n", + " for split in [\"train\", \"valid\", \"test\"]:\n", + " df = load_file(f\"writingPrompts/{split}.wp_source\", [\"prompt\"])\n", + " for tag in tags.keys():\n", + " df[tag.lower()] = df[\"prompt\"].map(lambda x: check_tag(x, tag.lower()))\n", + " df[\"tagCounter\"] = df.iloc[:, [2, -1]].sum(axis=1)\n", + " df[\"splitLineIndex\"] = df.index\n", + " story = load_file(f\"writingPrompts/{split}.wp_target\", [\"story\"])\n", + " df[\"story\"] = story[\"story\"]\n", + " df[\"split\"] = split\n", + " dfConcat = pd.concat([dfConcat, df])\n", + " return dfConcat\n", + "\n", + "\n", + "def check_tag(item, tag):\n", + " r = re.compile(r\"[\\(\\{\\[]\\s*[\\w]{2}\\s*[\\]\\}\\)]\\s*\")\n", + " m = r.findall(item.lower())\n", + " if len(m) > 0:\n", + " for group in m:\n", + " if tag in group:\n", + " return 1\n", + " return 0\n", + "\n", + "\n", + "def show_data(df):\n", + " html_string = \"\"\"\n", + " \n", + " HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " {table}\n", + " \n", + " .\n", + " \"\"\"\n", + " df = df.replace(\"\\|\\< newline \\>|\\\", \"\\n\", regex=True)\n", + " df.style.set_properties(**{\"text-align\": \"left\"}).set_table_styles(\n", + " [dict(selector=\"th\", props=[(\"text-align\", \"left\")])]\n", + " )\n", + " html = df.to_html()\n", + " html_string = html_string.format(table=html)\n", + " html_string = (\n", + " html_string.replace(r\"\\n\", \"
\")\n", + " .replace(\"\", '')\n", + " .replace(\"\", '')\n", + " )\n", + " display(HTML(html_string))\n", + "\n", + "\n", + "def get_samples(df, n, constraint=None, show=True):\n", + " samples = zip(df[\"prompt\"].iloc[:n, 0].index, df[\"prompt\"].iloc[:n, 0], df[\"story\"].iloc[:n, 0])\n", + " df = pd.DataFrame(samples, columns=[\"index\", \"prompt\", \"story\"])\n", + " if constraint is not None:\n", + " df = df[df[\"prompt\"].str.contains(constraint)]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ WP ] Leonardo DiCaprio in a fit of rage begins to torpedo his own career by deliberately acting poorly and taking on bad films . He finally wins an oscar for starring in Paul Blart : Mall Cop 3 .\n", + "[ CW ] Kill the writer in first-person narrative .\n" + ] + } + ], + "source": [ + "!head -n2 writingPrompts/test.wp_source" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
promptwpspeucwttpmmpippiotrftagCountersplitLineIndexstorysplit
0[ WP ] You 've finally managed to discover the...1000000000000So many times have I walked on ruins , the rem...train
1[ WP ] The moon is actually a giant egg , and ...1000000000001-Week 18 aboard the Depth Reaver , Circa 2023-...train
2[ WP ] You find a rip in time walking through ...1000000000002I was feckin ' sloshed , mate . First time I e...train
\n", + "
" + ], + "text/plain": [ + " prompt wp sp eu cw tt pm \\\n", + "0 [ WP ] You 've finally managed to discover the... 1 0 0 0 0 0 \n", + "1 [ WP ] The moon is actually a giant egg , and ... 1 0 0 0 0 0 \n", + "2 [ WP ] You find a rip in time walking through ... 1 0 0 0 0 0 \n", + "\n", + " mp ip pi ot rf tagCounter splitLineIndex \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 1 \n", + "2 0 0 0 0 0 0 2 \n", + "\n", + " story split \n", + "0 So many times have I walked on ruins , the rem... train \n", + "1 -Week 18 aboard the Depth Reaver , Circa 2023-... train \n", + "2 I was feckin ' sloshed , mate . First time I e... train " + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(303358, 16)\n" + ] + } + ], + "source": [ + "print(ds.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['splitLineIndex', 'prompt', 'story', 'split'], dtype='object')" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[ds[\"split\"] == \"test\"].iloc[:2, [13, 0, 14, -1]].columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] You 've finally managed to discover the secret to immortality . Suddenly , Death appears before you , hands you a business card , and says , `` When you realize living forever sucks , call this number , I 've got a job offer for you . ''
So many times have I walked on ruins , the remainings of places that I loved and got used to.. At first I was scared , each time I could feel my city , my current generation collapse , break into the black hole that thrives within it , I could feel humanity , the way I 'm able to feel my body.. After a few hundred years , the pattern became obvious , no longer the war and damage that would devastate me over and over again in the far past was effecting me so dominantly .
It 's funny , but I felt as if after gaining what I desired so long , what I have lived for my entire life , only then , when I achieved immortality I started truly aging .

5 world wars have passed , and now they feel like a simple sickeness that would pass by every so often , I could no longer evaluate the individual human as a being of its own , the importance of mortals is merely the same as the importance of my skin cells ; They are a part of a mechanism so much more advanced , a mechanism that is so dear to my fallen heart a mechanism that I have seen fall and rise so many times , a mechanism that when lost all of which it had , had me loosing my will to live , for the first time in all of my thousands years of existence .

Acceptance , something so important . a skill that has proved itself worthy dozens of times , an ability that looks so easy to achieve , a gift , that I was n't able to aquire in all my years , until now . When the ashes on the ground flew into the now empty air upon humanity 's fall , I felt as if all of it 's weight was crushing me . Ignorance took over and I searched years for a hope , a sign of the very same patterns that I used to watch reappear every hundred years , the very core of my will to exist that was now no more that I so strongly wish was .

If you have ever wondered if silence can drive people crazy , it can..
I ca n't feel my legs , I have walked for days , just to hear the sound of gravel , crushed bones , crushed buildings and crushed civilizations under my steps to keep my sanity.. until I remembered , the day in my far past . The day of my rebirth , I took out of my pocket a small plastic box , with nine buttons and a small glass window . I could n't believe this was our past , I could n't believe how far we have been able to progress and yet , be destroyed by our own violence .
I slowly dialed the number I was given , exactly 1729 years ago .

I dropped a tear , a tear that was too slow to hit the ground as I got sucked into the darkness that emerged around me .

A chill went through my spine as I saw my destiny rise above me , I could see the white teeth under the dark cloack ...

`` You have finally arrived '' He projected into my mind , with the most chilling cold and unhuman voice .

`` I 'm ready to obey '' I answered . I knew who was sitting infront of me , and it was time for me to obey him , after all these years of playing god , even I came to it .

Funny is n't it ? Even by achieving immortality , death , is inescapable .
train
11[ WP ] The moon is actually a giant egg , and it has just started to hatch .
-Week 18 aboard the Depth Reaver , Circa 2023-

I walk about the dull gray halls , the artificial gravity making my steps feel almost as if they were on land . Almost . I glance out a window as I pass it by . There 's the sun , and there 's the moon right there . And , of course , there 's the Earth . I kinda miss it . Then again , space is pretty cool . It 's got some brilliant views , and the wifi is surprisingly good . Even countless miles away from the Earth , I can crush Silver noobs on CS GO .

I pass by Dale Malkowitz , the head scientist on board .

`` Evening , Dale , '' I say .

`` What up , Danny ? '' he replies cordially .

`` Nothin ' much . A little bored , I guess . ''

He shakes his head in disbelief . `` I really , *really* do n't understand how you can be bored in space . ''

`` Well hey , '' I say slightly defensively , `` Aside from the views , it 's kinda ... dull . And empty . And stuff . ''

`` Whatever you say , Wittell , '' he says , not unkindly . Then he walks off . A few moments pass , and then I decide to look out the window right by me . As my eyes scan the inky blackness of space ( again ) , I notice something odd about the moon 's surface . It 's slightly ... cracked .

`` Hey , Malkowitz ? '' I call out , `` You might wan na check this out ! ''

He walks over to me casually , probably expecting nothing . `` What ? '' he asks , `` What do you see ? ''

I point at the moon . His brow furrows . `` Huh ... I guess there 's something up with the surface . I 'll have to look into tha- ''

Suddenly , the surface cracks a little more . We glance at each other , and then back at the moon , and then at each other again , and then back at the moon again .

`` What 's going on ? '' I ask , alarmed .

He 's silent for a minute or two , mouth hanging open . Then , he calls out : `` Janice ! Terry ! Johnny ! Get over here ! Something 's up with the moon . ''

The other crewmates enter , unsure of what to expect . As their eyes lay upon the moon 's surface cracks , they widen .

And , by coincidence , more cracks appear at that very moment . And then more . And more . And more . And more ...

Little bits of the moon begin to float away , torn free of the rest of the surface . We all stare , speechless . And then ... it happens . It *happens* .

The side of the moon facing us is ... torn away by a ...

Human ... hand ?

And we see ...

A giant ... human face ? !

Surprisingly , I can hear my thoughts over my racing heart . *I ca n't help but feel as if I recognize that face ... from the ... *

*Internet . *

Suddenly , the great face 's lips move .

Of course , none of us can actually *hear* it speak , because of the laws of space and whatnot . However , I can read its lips , and it appears to be saying :

`` Are you sure about that ? ''
train
\n", + " \n", + " .\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"train\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Valid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] Every person in the world undergoes a `` goodness '' test . It 's designed to give a score from 1 to 200 , where 1 is pure evil , and 200 is an angel in human body . Then the world is divided into 200 zones , where people can live among their own kind .
Clancy Marguerian , 154 , private first class of the 150+ army , sits in his foxhole . Tired cold , wet and hungry , the only thing preventing him from laying down his rifle and walking towards the enemy lines in surrender is the knowledge that however bad he has it here , life as a 50-100 POW is surely much worse . He 's fighting to keep his eyes open and his rifle ready when the mortar shells start landing near him .

He hunkers lower .

After a few minutes under the barrage , Marguerian hears hurried footsteps , a grunt , and a thud as a soldier leaps into the foxhole . The man 's uniform is tan , he must be a 50-100 .

The two men snarl and grab at eachother , grappling in the small foxhole . Abruptly , their faces come together .

`` Clancy ? ''

`` Rob ? ''

Rob Hall , 97 , Corporal in the 50-100 army grins , as the situation turns from life or death struggle , to a meeting of two college friends . He lets go of Marguerian 's collar .

`` Holy shit Clancy , you 're the last person I expected to see here ''

`` Yeah ''

`` Shit man , I did n't think I 'd ever see 'Mr . volunteers every saturday morning at the food shelf ' , not after The Reorganization at least ''

`` Yeah Rob , it is something is n't it ''

`` Man , I 'm sorry I tried to kill you there , hey , I heard you guys were out of food , here , you can share my dinner ''

Clancy marvels , even after all this : The Reorganization , the coalitions , the war , Rob is still his old , chatty self .

The two men sit , Rob chatting away , Clancy forcing out pleasantries . They pass Rob 's rations between them .


`` Clancy my man , I heard a group of terrorist 5 's took have formed some kind of cult , and they 're rallying all the < 50 in their own coalition ''

`` Oh yeah ? ''

`` Yeah , I mean , that sucks and everything , cause those are some scary dudes , but I heard that there 's going to be a truce between our countries in a few days , why do n't we just hang out here , pretty soon we wo n't even be enemies anymore ! ''

`` Yeah , Rob , that sounds like a plan ''

`` Man , I 'm so glad I found you again , in a few days , this war will be over , and things will be cool between us and , hey , remember Sarah ? I heard she 's a 151 , maybe I 'll look her up , I 'll be sure to visit you too once I can get a pass to sector 150-155 , it 'll probably be tough though , even before the war , you had to do sooo much paperwork to be allowed to visit , I wonder if passes will even be reinstated after the truce ends , hey , did I ever tell you about the time ... ''

Rob babbles as he dozes off , grinning up at Clancy .

When Clancy is sure that his friend is asleep , he slits Rob 's throat with his bayonet . Clancy climbs out of the foxhole , and stumbles his way back to battalion HQ .
valid
11[ WP ] Space mining is on the rise . The Space tanker Exxon Valdez 2.0 crash and spill its cargo . Write a news story covering the event .
„… and the little duckling will never be able to walk again. ”

The artificial intelligence paused a moment for dramatic effect before continuing with its broadcast with a different voice .

“ What a hearth breaking story , Frank . But now to another story that may leave you feel equally dirty . The automated space tanker Exxon Valdez 2.0 collided with an asteroid on its way to the Jupiter moon Ganymede . According to the ship owner the ship is out of control and leaking its content into space. ”

“ That ’ s right , Fred . And the content of the ship has it in it , as they say ” , the computer said in first voice again , “ The whole tanker was filled with ‘ biological waste products ’ coming from research and mining stations in the Kuiper Belt. ”

“ Biological waste products ? You don ’ t mean ... ”

“ Yes , Fred ! ” Dramatic pause . “ I am talking about poop . Lots of it . And apparently it ’ s spilling everywhere. ”

“ Better call the plumbers , Frank. ”

“ Not any time soon , Fred . A spokesperson of the ship owner stated and I quote – ‘ Space is kind of big and empty , we expect no one to care , so why should we ? ’ Apparently they will just build a new ship and be done with it. ”

“ That ’ s one way not to deal with the problem . But why doesn ’ t the ship fly home ? Shouldn ’ t the AI on board be able to handle such a problem ? ”

“ Well , the issue is that the part in charge to deal with asteroid impacts like that has been impacted by the asteroid. ”

“ Ouch . Talk about a bad run. ”

“ True , especially if you take the name of the ship in consideration. ”

“ Oh ? Exxon Valdez 2.0 it was , isn ’ t that right , Frank ? ”

“ You ’ re absolutely right , Fred . Did you know the ship was named after an infamous ship of the twentieth century back on old Earth ? Apparently the Exxon Valdez of old was used for transporting petroleum across the oceans of Earth . Petroleum , as some of our listeners might not know , was a brownish black , gooey liquid comprised of biological matter which was transformed under high pressure for millions of years . Quite ironically the Exxon Valdez was infamous for crashing and spilling its cargo. ”

“ Well , talk about making a bad name for yourself . Now both ships will go down in history for spilling black gooey stuff where it doesn ’ t belong . Who had that bright idea for such a name anyway ? ”

“ Well , Fred , the company made its first plunder by holding a naming contest on the internet. ”

“ Oh , will they ever learn ? ”

“ Apparently not , Fred . Predictably someone tried to make a joke out of it . A niche side of history role players got wind of the contest and made it its goal to get it named after the infamous Exxon Valdez . Apparently they thought it would be funny , and given the content both ships were ferrying around , they might have a point. ”

“ Funny , indeed , Frank . What ’ s the name of the side ? ”

“ Well , Fred , it ’ s called Reddit . The people there mostly talk in outdated lingo and memes and watch cat pictures back from a time when the internet only was local on Earth. ”

“ Truly a herald of the dark ages. ”

“ You might be right about that , Fred . I assume they just thought it was funny . I guess this happens , when you let the internet decide on things. ”

“ Well , Frank , when you think about the content both ships were ferrying around , they might have been right . Embarrassing for the company , but funny for everyone else. ”

“ It might get worse than that , Fred . Environmentalists are up in arms . They claim that the human waste products spilling out of the ship might collide with Jupiter ’ s moon Europa within the next few millennia and might contaminate the biospheres with Earth life . Apparently there are a lot of bacteria and the likes in poop and some might be able survive the harsh conditions of space and end up impacting on the restricted moon. ”

“ Oh dear , Frank , does the Monolith know about it yet ? I am sure it won ’ t let us hear the end of it. ”

valid
\n", + " \n", + " .\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"valid\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] Leonardo DiCaprio in a fit of rage begins to torpedo his own career by deliberately acting poorly and taking on bad films . He finally wins an oscar for starring in Paul Blart : Mall Cop 3 .
The wet marble floor pressed on his cheek like a thousand hands slapping his face frozen in time . Smattering piss of rain ignored his indignant mumblings . His eyes fluttered . Pins and needs ran from finger to shoulder as he pushed back against the floor , contorting his aching body into a cross legged position . Last night was bad . He gathered that . His routine dullness of though crept inwards from the edges of his mind toward the black mist that veiled his most recent memories . He struggled to recall whatever he could n't recall but only for a moment before he decided it probably was n't worth the effort .
He glanced around the room for a few minutes before concluding that he probably did n't know where he was . His investigation was n't entirely fruitless , he discovered a mostly full bottle of vodka . It was cheap but would definitely get the job done . Taking a few swigs made it childishly easy to ignore that gigantic black cloud of fog blotting out whatever the hell he did before he woke up .
There was a mirror in the room and for want of anything more interesting to study he gazed at himself . It was a game he 'd play with himself , glancing at the mirror and seeing if he could recognize the person looking back . If he did n't know better he 'd have guessed he was a very successful mattress salesman , or perhaps a bum who had managed to score some luck gambling .
His face was portly and unshaven , in that limbo place where it had been too many days without being clean and too few days to become a beard . His stomach was round but firm , like a basketball stuffed under a shirt and then semi deflated . The hair was long and unruly , receding far into the past . But his eyes were the giveaway . Looking closely enough at them he could still see an intensity . It was n't the sharp kind he carried in his youth but rather like a rusted dagger . Still sharp enough to cut .
`` DiCaprio . '' The curse rasped out of him in a choke . After all these years spent working on the hallmark channel and tv series based on mediocre movies he was still there . Despite his best efforts to bury himself under all of the alchol and drugs he was still in there . He thought for sure after the bankruptcy he 'd be done , but no that god damned rerelease of Titanic the royalties started pouring in and he could n't get rid of the money . Not even the live action version of the nut job could destroy him .
Cursing he hurled the bottle at the mirror but his wet hands slipped and instead of a shattering crash there was only a thud as the bottle bounced off the dry wall and rolled on the floor .
His rage thwarted by his impotence he slumped against the floor and finally noticed why there was rain coming into this room .

The window was smashed . He looked at the bottle , confused . No , he had n't done that . At least not with the vodka . He looked back at the glass etched around the window sill and his eyes hung on the red that stained the jagged teeth .

The headache crept back towards the front of his mind while the bloody glass pinned his eyes in place . What the fuck happened last night ?
test
11[ CW ] Kill the writer in first-person narrative .
It 's been three days since my boyfriend pissed off the neighbors .

They had to be pissed , he called the police on them . The neighbors had been harboring a runaway criminal . We did n't live in a bad neighborhood , there were families and good people living here with solid steady jobs . They cared about their yards and such . But , there was a bad egg , our neighbors to the south of us were shady . We could hear them yelling at their dog many times a week . Strange smoke often came out of their house , and the lights in the garage were on at odd hours . We never had proof until now that our concerns are legitimate .

The car the escaped criminal was driving had been parked at the neighbor 's house and my boyfriend decided he should turn them in . This lead to the police parking in front of *our* house , and watching them through our bedroom window for hours until they caught him . They had to know it was us . And it freaked me out .

I had started tucking my pink taser in my jacket pocket when I took my miniature Yorkie out to go potty . My neighbor to the north , Jay , seemed to notice my tension , so when he saw me step outside , he 'd come out and chat with me . He 'd ask me about work , and talk to me about his latest construction jobs . Jay always pretend to be grabbing something out of his massive pick-up truck . It usually followed the same pattern - he grabs something out of his truck , sees me out with my dog , then starts in on how it baffles him how such a tiny dog was smarter than most of the people he worked with . We 'd both gripe about our jobs and laugh about stupid customers , chase the puppy down when she tried to go after squirrels , and then part ways until the next potty break .

The sun was beginning to set when my dog started doing her potty dance by the door . I put on my jacket , slipped my taser in my pocket , and opened the door . She bolted out the door and went straight for the squirrel sniffing around the sidewalk .

`` NO ! BAD GIRL , COME HERE ! '' The squirrel started running across the road and her tiny legs skittered out of it . I ran after her , swearing as I tripped over a crack in the road . I felt a snap in my ankle and I went down .

The roar of a large pick-up engine was too close and I did n't know what to look at - my little dog bouncing across the neighbor 's lawn , or the tires that were n't slowing down fast enough . I chose neither and closed my eyes .

The last thing I heard was the clatter of of work boots and Jay voice cracking , `` Oh god , oh god , oh god ... ''


test
\n", + " \n", + " .\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"test\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Augmentation " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Triage Prompts\n", + "\n", + "1. Take the prompts list order by frequency\n", + "2. Define regex patterns for prompt and constraint\n", + "3. Generate prompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_rep = ds.groupby([\"prompt\", \"split\"]).size().reset_index().rename(columns={0: \"records\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_rep = df_rep[df_rep[\"records\"] > 20].sort_values([\"records\"], ascending=False)\n", + "# _str = df_rep[df_rep['records']>20].sort_values(['records'], ascending=False).iloc[1,0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# df_rep[df_rep[\"split\"] == \"valid\"].iloc[1:3, 0]\n", + "# topPrompts20Reps += df_rep[df_rep[\"split\"] == \"valid\"].iloc[1:3, 0].to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[\"[ WP ] To get in Heaven , you have to confront the person who you hurt the most . You were expecting an ex , your parents/relatives , or a friend . You did n't expect to see yourself .\\n\",\n", + " \"[ WP ] You are born without emotions ; to compensate this , you started a donation box where people could donate their unwanted emotions . You 've lived a life filled with sadness , fear and regret until one day , someone donates happiness .\\n\",\n", + " \"[ WP ] You are a teenager with the ability to measure how `` Dangerous '' people are on a scale from 1 to 10 just by looking at them . A normal child would be a 1 , while a trained man with an assault rifle might be a 7 . Today , you notice the unassuming new kid at school measures a 10 .\\n\",\n", + " '[ WP ] You live in a world where every person receives a superpower on their 18th birthday . You eagerly count down the seconds then shriek in horror as you are given a power no one would ever want to be stuck with .\\n']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topPrompts20Reps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "topPrompts20Reps = df_rep[df_rep[\"records\"] > 20].sort_values([\"records\"], ascending=False)[\"prompt\"].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We found 1016 prompts having more than 20 stories\n" + ] + } + ], + "source": [ + "print(f\"We found {len(topPrompts20Reps)} prompts having more than 20 stories\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROMPT_PATTERNS = \"(Lucifer\\snever[\\s\\w,]+)|\\\n", + "([\\. \\w,]+)\\.\\s+Tell me|\\\n", + "(All injuries[\\. \\w,]+)\\.|\\\n", + "(? 0:\n", + " return m.group(0)\n", + " return None\n", + "\n", + "\n", + "from spacy.lang.en import English\n", + "\n", + "\n", + "def get_sentences(_str):\n", + " chunks = _str.split(\"\\n\")\n", + " sentences = []\n", + " nlp = English()\n", + " nlp.add_pipe(\"sentencizer\")\n", + " for chunk in chunks:\n", + " doc = nlp(chunk)\n", + " sentences += [sent.text.strip() for sent in doc.sents]\n", + " return sentences\n", + "\n", + "\n", + "from itertools import islice\n", + "\n", + "\n", + "def window(seq, n=2):\n", + " it = iter(seq)\n", + " result = tuple(islice(it, n))\n", + " if len(result) == n:\n", + " yield \" \".join(result)\n", + " for elem in it:\n", + " result = result[1:] + (elem,)\n", + " yield \" \".join(result)\n", + "\n", + "\n", + "def extract_story_parts(story):\n", + " sentences = get_sentences(story)\n", + " beginning = sentences.pop(0)\n", + " middles = window(sentences, 4)\n", + " ending = sentences.pop(-1)\n", + " return beginning, middles, ending\n", + "\n", + "\n", + "def clear_prompt(prompt):\n", + " return re.sub(r\"^[Ww]rite \", \"\", prompt)\n", + "\n", + "\n", + "def get_sample_dict(split, id, text):\n", + " return {\"split\": split, \"splitLineIndex\": id, \"text\": text}\n", + "\n", + "\n", + "def generate_instruction_diologs(df):\n", + " dialogs = []\n", + " \"\"\"User: What is this story about: {story} -> Rosey: I think it's about: {striped_prompt}\"\"\"\n", + " dialogBase = \"\"\"User: write me a story about: {stripped_prompt}\"\"\"\n", + " dialog1 = \"\"\" -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\"\"\"\n", + " dialog2 = \"\"\", {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\"\"\"\n", + " dialog3 = \"\"\", starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\"\"\"\n", + " dialog4 = \"\"\", ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\"\"\"\n", + " dialog5 = \"\"\", where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\"\"\"\n", + "\n", + " df_rep = df.groupby([\"prompt\"]).size().reset_index().rename(columns={0: \"records\"})\n", + " df_rep.sort_values([\"records\"], ascending=False, inplace=True)\n", + " pbar = tqdm()\n", + " pbar.reset(total=len(df_rep))\n", + " for prompt in df_rep.iloc[:, 0]:\n", + " strippedPrompt = extract_prompt_parts(prompt, PROMPT_PATTERNS)\n", + " if strippedPrompt is None:\n", + " continue\n", + " strippedPrompt = clear_prompt(strippedPrompt)\n", + " strippedConstraint = extract_prompt_parts(prompt, CONST_PATTERNS)\n", + "\n", + " for row in df[df[\"prompt\"] == prompt].itertuples():\n", + " try:\n", + " story = (\n", + " row.story.replace(\"\", \"\\n\")\n", + " .replace(\"< newline >\", \"\\n\")\n", + " .replace(\"\", \"\\n\")\n", + " .strip()\n", + " )\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " dialogBeg = dialogBase.format(stripped_prompt=strippedPrompt)\n", + " dialog = dialogBeg + dialog1.format(story=story, stripped_prompt=strippedPrompt)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " if strippedConstraint is not None:\n", + " dialog = dialogBeg + dialog2.format(\n", + " stripped_prompt=strippedPrompt, stripped_constraint=strippedConstraint, story=story\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " dialog = dialogBeg + dialog3.format(stripped_prompt=strippedPrompt, story=story, beggining=beginning)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " dialog = dialogBeg + dialog4.format(stripped_prompt=strippedPrompt, story=story, ending=ending)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " middlesSumarizered = summarizer(middles, **params)\n", + " for middle, sumarizedMiddle in zip(middles, middlesSumarizered):\n", + " # dialogs.append(dialogBeg + dialog5.format(stripped_prompt=strippedPrompt, story=story, middle=middle))\n", + " dialog = dialogBeg + dialog5.format(\n", + " stripped_prompt=strippedPrompt, story=story, middle=sumarizedMiddle[0][\"summary_text\"]\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " pbar.update()\n", + " except Exception as e:\n", + " print(f\"{row.split}/{row.splitIndex}\")\n", + " raise e\n", + " pbar.refresh()\n", + " return dialogs\n", + "\n", + "\n", + "def filter_data(\n", + " dataset,\n", + " negativeTagFilter=None,\n", + " positiveTagFilter=None,\n", + " patternFilter=None,\n", + "):\n", + " \"\"\"\n", + " > filter_data(dataset['train'],negativeTagFilter=['ip'], positiveTagFilter=['pm'] )\n", + " \"\"\"\n", + " prompt = dataset[\"prompt\"]\n", + " if negativeTagFilter is not None:\n", + " prompt = prompt[(prompt[negativeTagFilter] < 1).any(axis=1)]\n", + " if positiveTagFilter is not None:\n", + " prompt = prompt[prompt[positiveTagFilter].gt(0).all(axis=1)]\n", + " if patternFilter is not None:\n", + " prompt = prompt[prompt[\"prompt\"].str.contains(patternFilter)]\n", + " story = dataset[\"story\"]\n", + " story = story.iloc[prompt.index]\n", + " return {\"prompt\": prompt, \"story\": story}\n", + "\n", + "\n", + "def generate_instruction_diologs(prompt, df):\n", + " dialogs = []\n", + " \"\"\"User: What is this story about: {story} -> Rosey: I think it's about: {striped_prompt}\"\"\"\n", + " dialogBase = \"\"\"User: write me a story about: {stripped_prompt}\"\"\"\n", + " dialog1 = \"\"\" -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\"\"\"\n", + " dialog2 = \"\"\", {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\"\"\"\n", + " dialog3 = \"\"\", starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\"\"\"\n", + " dialog4 = \"\"\", ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\"\"\"\n", + " dialog5 = \"\"\", where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\"\"\"\n", + "\n", + " strippedPrompt = extract_prompt_parts(prompt, PROMPT_PATTERNS)\n", + " if strippedPrompt is not None:\n", + " strippedPrompt = clear_prompt(strippedPrompt)\n", + " strippedConstraint = extract_prompt_parts(prompt, CONST_PATTERNS)\n", + " pbar = tqdm(ascii=True, desc=\"stories\")\n", + " pbar.reset(total=len(df[df[\"prompt\"] == prompt]))\n", + " for row in df[df[\"prompt\"] == prompt].itertuples():\n", + " try:\n", + " story = (\n", + " row.story.replace(\"\", \"\\n\")\n", + " .replace(\"< newline >\", \"\\n\")\n", + " .replace(\"\", \"\\n\")\n", + " .strip()\n", + " )\n", + " dialogBeg = dialogBase.format(stripped_prompt=strippedPrompt)\n", + " dialog = dialogBeg + dialog1.format(story=story, stripped_prompt=strippedPrompt)\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " if strippedConstraint is not None:\n", + " dialog = dialogBeg + dialog2.format(\n", + " stripped_prompt=strippedPrompt, stripped_constraint=strippedConstraint, story=story\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " if beginning is not None:\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " dialog = dialogBeg + dialog3.format(\n", + " stripped_prompt=strippedPrompt, story=story, beggining=beginning\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " dialog = dialogBeg + dialog4.format(stripped_prompt=strippedPrompt, story=story, ending=ending)\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " middlesSumarizered = summarizer(middles, **params)\n", + " for middle, sumarizedMiddle in zip(middles, middlesSumarizered):\n", + " # dialogs.append(dialogBeg + dialog5.format(stripped_prompt=strippedPrompt, story=story, middle=middle))\n", + " dialog = dialogBeg + dialog5.format(\n", + " stripped_prompt=strippedPrompt, story=story, middle=sumarizedMiddle[0][\"summary_text\"]\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " pbar.update()\n", + " except Exception as e:\n", + " print(f\"{row.split}/{row.splitLineIndex}\")\n", + " raise e\n", + " pbar.refresh()\n", + " return dialogs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It saves parquet every `step` samples to avoid losing work. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## filter dataset to take only prompts with frequency greater than 20 stories.\n", + "dialogs = []\n", + "i = 0\n", + "start = 0\n", + "step = 10\n", + "for index in range(start, len(topPrompts20Reps), step):\n", + " pbar = tqdm(ascii=True, desc=\"prompt\")\n", + " pbar.reset(total=len(topPrompts20Reps[index : index + step]))\n", + " for prompt in topPrompts20Reps[index : index + step]:\n", + " tmpDialogs = generate_instruction_diologs(prompt, ds)\n", + " if tmpDialogs is not None:\n", + " dialogs += tmpDialogs\n", + " pbar.update()\n", + " if len(dialogs) > 0:\n", + " pd.DataFrame(dialogs).to_parquet(\"writing-prompts-aug.parquet\")\n", + " pbar.refresh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(\"writing-prompts-aug.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for split in list(set(df.split)):\n", + " df_aux = df[df[\"split\"] == split].iloc[:, 1:]\n", + " df_aux.reset_index(inplace=True)\n", + " df_aux.iloc[:, 1:].to_parquet(f\"{split}.parquet\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "01073391c27d455898ddec5e5b613840": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "02aff4fac4454967b80469f0774e1a6c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03209aedabd94b9f97c7ff186d61a1b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03c75c2c3a674154aa1370081c8d2d0c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "04eee7ef7947484c9a2fb9bb6ff14eec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "04f0d4dafcee402780ab34cfba03179e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7390362a9704413984a47a1d5b262276", + "IPY_MODEL_b263a25a96f547218983b9e62f2b841c", + "IPY_MODEL_f8ac6fc3cf284b50bb54c6ade26db5a1" + ], + "layout": "IPY_MODEL_aca1b6be80124fd0999892577aee9f1e" + } + }, + "05cf82d369674d848d9d2dd50be546ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_949e1ca0688f4df39c6f0aee139a8a4b", + "placeholder": "​", + "style": "IPY_MODEL_666605f8ef614cc5806b7e2076095746", + "value": " 27%" + } + }, + "09ad8cfb26814f979a82ac73f073d5c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_903c2a0ea90043d5ab9c6812ee118c1a", + "IPY_MODEL_664e94791b1946e1a78bfa93e9ce0b6f", + "IPY_MODEL_322330e98fc745df9b55a959392c015c" + ], + "layout": "IPY_MODEL_02aff4fac4454967b80469f0774e1a6c" + } + }, + "0ae446f572cd4bc5b6ac64e5f1aff216": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0bba8f8e7f754d1eb204db2ceab4aaab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0d209a94698d43748bccb06629b1c97a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14f3ee8a6fa943178e949c45baa7683f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "161a4ed9fcd04fee984704a6666f5399": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1924812f3b644648ae3671cb1f8f659f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1a774659596145c48dfd1703664ffbaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c9c8492343e4a86b3977b41abf2c91c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "20beb9b7ad504afba558ed28b6fb242b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a369f1da9f94552960bb42bc895fb4a", + "IPY_MODEL_4f372c13f77245c49925981c33d1d611", + "IPY_MODEL_d0381de0ca3a4359a0d2c393e9f64f69" + ], + "layout": "IPY_MODEL_c50b53014ab44ef4b196b1a79c1ad61c" + } + }, + "2102cf1d8f6b4192b6d45dfbe4e5044d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "227455685ef746a4845020529c86aca2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "28085d8a3b4341e5bac2ce7efd9d89d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_92fb3795816548ffb336749cf590d335", + "max": 2422362, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3402b3a652254e90b3d6ef17dccfe90a", + "value": 2422362 + } + }, + "2e3b3d799b5b461d91fb4b2fa64ea7be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "322330e98fc745df9b55a959392c015c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c9fbbe9addd4d4a82a0e7f2a9410af2", + "placeholder": "​", + "style": "IPY_MODEL_c4b3a987b2eb4d81a209fe62f1f00459", + "value": " 792k/792k [00:00<00:00, 10.6MB/s]" + } + }, + "33fc1d4498574c1e86e7c336ab3c4a9d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3402b3a652254e90b3d6ef17dccfe90a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3612d9a6e93348d6b7b98ca7d611eec4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1924812f3b644648ae3671cb1f8f659f", + "placeholder": "​", + "style": "IPY_MODEL_0bba8f8e7f754d1eb204db2ceab4aaab", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "3dce9197ad544ff2be48248640298d38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "43b109811c7d42089713ad5c327afc9d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "43f709c83c424926b92e36acc3c95e1a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "473e0749bada493b90253b7c0a816e59": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4b9d6ee49ebd4c018d01f8a64fb112e1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_05cf82d369674d848d9d2dd50be546ad", + "IPY_MODEL_ce7a65dfe8a04e29b8512044fe994b87", + "IPY_MODEL_e639d6f2dafd4897a9a5df658cdf68b0" + ], + "layout": "IPY_MODEL_0d209a94698d43748bccb06629b1c97a" + } + }, + "4cfd63abaee74a1babed15ecc1ee834a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f372c13f77245c49925981c33d1d611": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b28758bfe16428ca933c9100b7a8b29", + "max": 2361, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_983d2b1c0515441db135aae6dd217c41", + "value": 2361 + } + }, + "5d2a8c459bfc4e24be21ceef1ec86ae0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "62113a2cac0d499b9acf2a89f1993f9a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "664e94791b1946e1a78bfa93e9ce0b6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2102cf1d8f6b4192b6d45dfbe4e5044d", + "max": 791656, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d07b4780b79340c8950e3f12c4d70820", + "value": 791656 + } + }, + "666605f8ef614cc5806b7e2076095746": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7390362a9704413984a47a1d5b262276": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3f48d9f38b8419aae37d33b4968c2f5", + "placeholder": "​", + "style": "IPY_MODEL_814a8be16bbd4c499b23e931155c6169", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "78188eb50348434e92dc947f6baae899": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e33fd695d0af44dcb068cd168190ec03", + "max": 1125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cf90c2cb43ae481baa3ef13417b1fc4b", + "value": 1125 + } + }, + "7c9fbbe9addd4d4a82a0e7f2a9410af2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d29075f6e25436cb7fa531b4f1b92f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03209aedabd94b9f97c7ff186d61a1b5", + "placeholder": "​", + "style": "IPY_MODEL_43b109811c7d42089713ad5c327afc9d", + "value": " 1.12k/1.12k [00:00<00:00, 39.0kB/s]" + } + }, + "7fa486f7cf6e41668382b57979928ecd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "814a8be16bbd4c499b23e931155c6169": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "818222eaa6d64018b9058bcf6531b658": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2e3b3d799b5b461d91fb4b2fa64ea7be", + "placeholder": "​", + "style": "IPY_MODEL_c41b65885a7b46d8b205b7db8e123cf4", + "value": "Downloading (…)/main/tokenizer.json: 100%" + } + }, + "83ad5f094e684a33b03a28fb7b54f1cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_818222eaa6d64018b9058bcf6531b658", + "IPY_MODEL_28085d8a3b4341e5bac2ce7efd9d89d5", + "IPY_MODEL_ca32d31fb99e4b5990ba6fd33d3e1915" + ], + "layout": "IPY_MODEL_62113a2cac0d499b9acf2a89f1993f9a" + } + }, + "865eeaa12f9d4ecbb5e38b2b3baaa4cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_feefc865187648db9956cafc5914b123", + "IPY_MODEL_eb27214d49314527aa99ab65e62ac529", + "IPY_MODEL_a87f3e961e0d486d81bebec195b396a5" + ], + "layout": "IPY_MODEL_01073391c27d455898ddec5e5b613840" + } + }, + "876761d92c4a47558604f24826fbf276": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88b58ed1580c4cf195963010c20d5454": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88c37802c3914ae6ab3e2cff32cfbe87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3612d9a6e93348d6b7b98ca7d611eec4", + "IPY_MODEL_78188eb50348434e92dc947f6baae899", + "IPY_MODEL_7d29075f6e25436cb7fa531b4f1b92f0" + ], + "layout": "IPY_MODEL_88b58ed1580c4cf195963010c20d5454" + } + }, + "8b28758bfe16428ca933c9100b7a8b29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8bc9ac5c49a445e5b341513efaf58a83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "903c2a0ea90043d5ab9c6812ee118c1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1a774659596145c48dfd1703664ffbaa", + "placeholder": "​", + "style": "IPY_MODEL_1c9c8492343e4a86b3977b41abf2c91c", + "value": "Downloading (…)"spiece.model";: 100%" + } + }, + "92fb3795816548ffb336749cf590d335": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "949e1ca0688f4df39c6f0aee139a8a4b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "983d2b1c0515441db135aae6dd217c41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9a369f1da9f94552960bb42bc895fb4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_227455685ef746a4845020529c86aca2", + "placeholder": "​", + "style": "IPY_MODEL_d65137c7ad444b38a2b8fcd1d36c1528", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "a1e32f35ab1c4014aa6903ef043b469c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a87f3e961e0d486d81bebec195b396a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_876761d92c4a47558604f24826fbf276", + "placeholder": "​", + "style": "IPY_MODEL_33fc1d4498574c1e86e7c336ab3c4a9d", + "value": " 990M/990M [00:28<00:00, 32.0MB/s]" + } + }, + "aca1b6be80124fd0999892577aee9f1e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b263a25a96f547218983b9e62f2b841c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_43f709c83c424926b92e36acc3c95e1a", + "max": 2201, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3dce9197ad544ff2be48248640298d38", + "value": 2201 + } + }, + "c3f48d9f38b8419aae37d33b4968c2f5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c41b65885a7b46d8b205b7db8e123cf4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4b3a987b2eb4d81a209fe62f1f00459": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c50b53014ab44ef4b196b1a79c1ad61c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6249ce38c8f437f9234faa7081743d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6da8ecbbf374f0d84e8704546a30c27": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca32d31fb99e4b5990ba6fd33d3e1915": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4cfd63abaee74a1babed15ecc1ee834a", + "placeholder": "​", + "style": "IPY_MODEL_14f3ee8a6fa943178e949c45baa7683f", + "value": " 2.42M/2.42M [00:01<00:00, 1.53MB/s]" + } + }, + "ce7a65dfe8a04e29b8512044fe994b87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03c75c2c3a674154aa1370081c8d2d0c", + "max": 1016, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_473e0749bada493b90253b7c0a816e59", + "value": 274 + } + }, + "cf90c2cb43ae481baa3ef13417b1fc4b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d0381de0ca3a4359a0d2c393e9f64f69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ddf56a6653304256bb61c8b69710fbec", + "placeholder": "​", + "style": "IPY_MODEL_0ae446f572cd4bc5b6ac64e5f1aff216", + "value": " 2.36k/2.36k [00:00<00:00, 111kB/s]" + } + }, + "d07b4780b79340c8950e3f12c4d70820": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d65137c7ad444b38a2b8fcd1d36c1528": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ddf56a6653304256bb61c8b69710fbec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e33fd695d0af44dcb068cd168190ec03": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e639d6f2dafd4897a9a5df658cdf68b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_161a4ed9fcd04fee984704a6666f5399", + "placeholder": "​", + "style": "IPY_MODEL_8bc9ac5c49a445e5b341513efaf58a83", + "value": " 273/1016 [2:01:10<8:24:02, 40.70s/it]" + } + }, + "eb27214d49314527aa99ab65e62ac529": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04eee7ef7947484c9a2fb9bb6ff14eec", + "max": 990446387, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a1e32f35ab1c4014aa6903ef043b469c", + "value": 990446387 + } + }, + "f8ac6fc3cf284b50bb54c6ade26db5a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6249ce38c8f437f9234faa7081743d4", + "placeholder": "​", + "style": "IPY_MODEL_5d2a8c459bfc4e24be21ceef1ec86ae0", + "value": " 2.20k/2.20k [00:00<00:00, 119kB/s]" + } + }, + "feefc865187648db9956cafc5914b123": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6da8ecbbf374f0d84e8704546a30c27", + "placeholder": "​", + "style": "IPY_MODEL_7fa486f7cf6e41668382b57979928ecd", + "value": "Downloading (…)"pytorch_model.bin";: 100%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/data-augmentation/writing-prompt/README.md b/notebooks/data-augmentation/writing-prompt/README.md new file mode 100644 index 00000000..4872a8e8 --- /dev/null +++ b/notebooks/data-augmentation/writing-prompt/README.md @@ -0,0 +1,17 @@ +# Writing Prompt + +Writing prompt folder has a notebook that entails the pipeline to take samples +of [Writing Prompt](https://www.kaggle.com/datasets/ratthachat/writing-prompts) +dataset and augment that collection with some small transformations into a +prompt, having the same story as a response. + +This process required the summarization of text that was executed by one A100 +GPU running [T5](pszemraj/long-t5-tglobal-base-16384-book-summary) model. + +The sample created was delivered at +[Hugging Face dataset](https://huggingface.co/datasets/fabraz/writingPromptAug/), +where you will find more details. + +## Contributing + +Feel free to contribute to this notebook. diff --git a/notebooks/data-augmentation/writing-prompt/writing_prompt.ipynb b/notebooks/data-augmentation/writing-prompt/writing_prompt.ipynb new file mode 100644 index 00000000..e40305da --- /dev/null +++ b/notebooks/data-augmentation/writing-prompt/writing_prompt.ipynb @@ -0,0 +1,3619 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# writing prompt augmentation data task" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/writing-prompt/writing_prompt.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pipeline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The goal of this task was to auto-generate question/answer samples from writingPrompts to feed openAssistant. To do that we should standardize the way a prompt was written. Our choice was to set prompt templates which might turn the generation process feasible. Here are the templates we applied:\n", + "\n", + "* Base template: every prompt would have this sample.\n", + "> User: write me a story about: {stripped_prompt} -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\n", + "\n", + "where `stripped_promt` is the cleared prompt output by regex pattern to take out parts of a prompt that would not fit the template. And `story` is the actual answer to a prompt.\n", + "\n", + "* General constraints: a prompt whose constraint was found by regex pattern would have this also.\n", + "> Base template, {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\n", + "\n", + "where `stripped_constraint` is the constraint found.\n", + "\n", + "* Answer beginning constraints: this constraint was imposed by the way the answer should start. \n", + "> Base template, starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\n", + "\n", + "where `beginning` is the first sentence of a story.\n", + "\n", + "* Answer end constraints: this constraint was imposed by the way the answer should end. \n", + "> Base template, ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\n", + "\n", + "where `ending` is the last sentence of a story.\n", + "\n", + "* Answer middle constraints: this constraint was imposed by the way the answer should have in its middle text. \n", + "> Base template, where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\n", + "\n", + "where `middle` is a summary of a story without the first and last sentence brought by a generative model\n", + "\n", + "To get the samples we used the following pipeline:\n", + "\n", + "* **Get data**: download from kaggle\n", + "* **Pre-processing**: load data from entails source/taget (aka: prompt/story) by every split (train/valid/test) merging into one pandas dataframe, enhancing tit with tabular info about the sample tags.\n", + "* **Triage prompts**: we pick prompts sorted by frequency, and we built regex pattern for some of them to extract a striped prompt and the related constraint.\n", + "* **Split stories**: after removing story beginning and ending sentences, we applied a sentence sliding window to get stories middle summaries." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get data from Kaggle\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# helper functions\n", + "import json\n", + "\n", + "\n", + "def save_credentials(d):\n", + " with open(\"/root/.kaggle/kaggle.json\", \"w\") as outfile:\n", + " json.dump(d, outfile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mv: cannot stat '/mnt/home/fabraz/kaggle.json': No such file or directory\n" + ] + } + ], + "source": [ + "# uncomment the following instructions, in case you want to save a .kaggle.json\n", + "# d = {}\n", + "# d['username'] = 'user'\n", + "# d['key'] = 'key'\n", + "#!mkdir ~/.kaggle\n", + "# save_credentials(d)\n", + "!mv ~/kaggle.json ~/.kaggle/\n", + "!chmod 600 ~/.kaggle/kaggle.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install kaggle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/bin/bash: kaggle: command not found\n" + ] + } + ], + "source": [ + "!kaggle datasets download -d ratthachat/writing-prompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Archive: writing-prompts.zip\n", + " inflating: writingPrompts/README \n", + " inflating: writingPrompts/test.wp_source \n", + " inflating: writingPrompts/test.wp_target \n", + " inflating: writingPrompts/train.wp_source \n", + " inflating: writingPrompts/train.wp_target \n", + " inflating: writingPrompts/valid.wp_source \n", + " inflating: writingPrompts/valid.wp_target \n" + ] + } + ], + "source": [ + "!unzip writing-prompts.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from IPython.display import display, HTML" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# helper functions\n", + "import re\n", + "\n", + "\n", + "def load_file(path, names):\n", + " with open(path, \"r\") as f:\n", + " lines = f.readlines()\n", + " return pd.DataFrame(lines, columns=names)\n", + "\n", + "\n", + "def load_data():\n", + " tags = {\n", + " \"WP\": \"Writing Prompt\",\n", + " \"SP\": \"Simple Prompt\",\n", + " \"EU\": \"Established Universe\",\n", + " \"CW\": \"Constrained Writing\",\n", + " \"TT\": \"Theme Thursday\",\n", + " \"PM\": \"Prompt Me\",\n", + " \"MP\": \"Media Prompt\",\n", + " \"IP\": \"Image Prompt\",\n", + " \"PI\": \"Prompt Inspired\",\n", + " \"OT\": \"Off Topic\",\n", + " \"RF\": \"Reality Fiction\",\n", + " }\n", + "\n", + " dfConcat = pd.DataFrame()\n", + " for split in [\"train\", \"valid\", \"test\"]:\n", + " df = load_file(f\"writingPrompts/{split}.wp_source\", [\"prompt\"])\n", + " for tag in tags.keys():\n", + " df[tag.lower()] = df[\"prompt\"].map(lambda x: check_tag(x, tag.lower()))\n", + " df[\"tagCounter\"] = df.iloc[:, [2, -1]].sum(axis=1)\n", + " df[\"splitLineIndex\"] = df.index\n", + " story = load_file(f\"writingPrompts/{split}.wp_target\", [\"story\"])\n", + " df[\"story\"] = story[\"story\"]\n", + " df[\"split\"] = split\n", + " dfConcat = pd.concat([dfConcat, df])\n", + " return dfConcat\n", + "\n", + "\n", + "def check_tag(item, tag):\n", + " r = re.compile(r\"[\\(\\{\\[]\\s*[\\w]{2}\\s*[\\]\\}\\)]\\s*\")\n", + " m = r.findall(item.lower())\n", + " if len(m) > 0:\n", + " for group in m:\n", + " if tag in group:\n", + " return 1\n", + " return 0\n", + "\n", + "\n", + "def show_data(df):\n", + " html_string = \"<\"\n", + " html_string += \"html><\"\n", + " html_string += \"head>HTML Pandas Dataframe with CSS<\"\n", + " html_string += 'link rel=\"stylesheet\" type=\"text/css\" href=\"df_style.css\"/'\n", + " html_string += \"><\"\n", + " html_string += \"\"\"body>\n", + " {table}\n", + " \n", + " \"\n", + " df = df.replace(\"\\|\\< newline \\>|\\\", \"\\n\", regex=True)\n", + " df.style.set_properties(**{\"text-align\": \"left\"}).set_table_styles(\n", + " [dict(selector=\"th\", props=[(\"text-align\", \"left\")])]\n", + " )\n", + " html = df.to_html()\n", + " html_string = html_string.format(table=html)\n", + " html_string = (\n", + " html_string.replace(r\"\\n\", \"
\")\n", + " .replace(\"\", '')\n", + " .replace(\"\", '')\n", + " )\n", + " display(HTML(html_string))\n", + "\n", + "\n", + "def get_samples(df, n, constraint=None, show=True):\n", + " samples = zip(df[\"prompt\"].iloc[:n, 0].index, df[\"prompt\"].iloc[:n, 0], df[\"story\"].iloc[:n, 0])\n", + " df = pd.DataFrame(samples, columns=[\"index\", \"prompt\", \"story\"])\n", + " if constraint is not None:\n", + " df = df[df[\"prompt\"].str.contains(constraint)]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ WP ] Leonardo DiCaprio in a fit of rage begins to torpedo his own career by deliberately acting poorly and taking on bad films . He finally wins an oscar for starring in Paul Blart : Mall Cop 3 .\n", + "[ CW ] Kill the writer in first-person narrative .\n" + ] + } + ], + "source": [ + "!head -n2 writingPrompts/test.wp_source" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "ds = load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
promptwpspeucwttpmmpippiotrftagCountersplitLineIndexstorysplit
0[ WP ] You 've finally managed to discover the...1000000000000So many times have I walked on ruins , the rem...train
1[ WP ] The moon is actually a giant egg , and ...1000000000001-Week 18 aboard the Depth Reaver , Circa 2023-...train
2[ WP ] You find a rip in time walking through ...1000000000002I was feckin ' sloshed , mate . First time I e...train
\n", + "
" + ], + "text/plain": [ + " prompt wp sp eu cw tt pm \\\n", + "0 [ WP ] You 've finally managed to discover the... 1 0 0 0 0 0 \n", + "1 [ WP ] The moon is actually a giant egg , and ... 1 0 0 0 0 0 \n", + "2 [ WP ] You find a rip in time walking through ... 1 0 0 0 0 0 \n", + "\n", + " mp ip pi ot rf tagCounter splitLineIndex \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 1 \n", + "2 0 0 0 0 0 0 2 \n", + "\n", + " story split \n", + "0 So many times have I walked on ruins , the rem... train \n", + "1 -Week 18 aboard the Depth Reaver , Circa 2023-... train \n", + "2 I was feckin ' sloshed , mate . First time I e... train " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(303358, 16)\n" + ] + } + ], + "source": [ + "print(ds.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['splitLineIndex', 'prompt', 'story', 'split'], dtype='object')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds[ds[\"split\"] == \"test\"].iloc[:2, [13, 0, 14, -1]].columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Samples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Train" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] You 've finally managed to discover the secret to immortality . Suddenly , Death appears before you , hands you a business card , and says , `` When you realize living forever sucks , call this number , I 've got a job offer for you . ''
So many times have I walked on ruins , the remainings of places that I loved and got used to.. At first I was scared , each time I could feel my city , my current generation collapse , break into the black hole that thrives within it , I could feel humanity , the way I 'm able to feel my body.. After a few hundred years , the pattern became obvious , no longer the war and damage that would devastate me over and over again in the far past was effecting me so dominantly .
It 's funny , but I felt as if after gaining what I desired so long , what I have lived for my entire life , only then , when I achieved immortality I started truly aging .

5 world wars have passed , and now they feel like a simple sickeness that would pass by every so often , I could no longer evaluate the individual human as a being of its own , the importance of mortals is merely the same as the importance of my skin cells ; They are a part of a mechanism so much more advanced , a mechanism that is so dear to my fallen heart a mechanism that I have seen fall and rise so many times , a mechanism that when lost all of which it had , had me loosing my will to live , for the first time in all of my thousands years of existence .

Acceptance , something so important . a skill that has proved itself worthy dozens of times , an ability that looks so easy to achieve , a gift , that I was n't able to aquire in all my years , until now . When the ashes on the ground flew into the now empty air upon humanity 's fall , I felt as if all of it 's weight was crushing me . Ignorance took over and I searched years for a hope , a sign of the very same patterns that I used to watch reappear every hundred years , the very core of my will to exist that was now no more that I so strongly wish was .

If you have ever wondered if silence can drive people crazy , it can..
I ca n't feel my legs , I have walked for days , just to hear the sound of gravel , crushed bones , crushed buildings and crushed civilizations under my steps to keep my sanity.. until I remembered , the day in my far past . The day of my rebirth , I took out of my pocket a small plastic box , with nine buttons and a small glass window . I could n't believe this was our past , I could n't believe how far we have been able to progress and yet , be destroyed by our own violence .
I slowly dialed the number I was given , exactly 1729 years ago .

I dropped a tear , a tear that was too slow to hit the ground as I got sucked into the darkness that emerged around me .

A chill went through my spine as I saw my destiny rise above me , I could see the white teeth under the dark cloack ...

`` You have finally arrived '' He projected into my mind , with the most chilling cold and unhuman voice .

`` I 'm ready to obey '' I answered . I knew who was sitting infront of me , and it was time for me to obey him , after all these years of playing god , even I came to it .

Funny is n't it ? Even by achieving immortality , death , is inescapable .
train
11[ WP ] The moon is actually a giant egg , and it has just started to hatch .
-Week 18 aboard the Depth Reaver , Circa 2023-

I walk about the dull gray halls , the artificial gravity making my steps feel almost as if they were on land . Almost . I glance out a window as I pass it by . There 's the sun , and there 's the moon right there . And , of course , there 's the Earth . I kinda miss it . Then again , space is pretty cool . It 's got some brilliant views , and the wifi is surprisingly good . Even countless miles away from the Earth , I can crush Silver noobs on CS GO .

I pass by Dale Malkowitz , the head scientist on board .

`` Evening , Dale , '' I say .

`` What up , Danny ? '' he replies cordially .

`` Nothin ' much . A little bored , I guess . ''

He shakes his head in disbelief . `` I really , *really* do n't understand how you can be bored in space . ''

`` Well hey , '' I say slightly defensively , `` Aside from the views , it 's kinda ... dull . And empty . And stuff . ''

`` Whatever you say , Wittell , '' he says , not unkindly . Then he walks off . A few moments pass , and then I decide to look out the window right by me . As my eyes scan the inky blackness of space ( again ) , I notice something odd about the moon 's surface . It 's slightly ... cracked .

`` Hey , Malkowitz ? '' I call out , `` You might wan na check this out ! ''

He walks over to me casually , probably expecting nothing . `` What ? '' he asks , `` What do you see ? ''

I point at the moon . His brow furrows . `` Huh ... I guess there 's something up with the surface . I 'll have to look into tha- ''

Suddenly , the surface cracks a little more . We glance at each other , and then back at the moon , and then at each other again , and then back at the moon again .

`` What 's going on ? '' I ask , alarmed .

He 's silent for a minute or two , mouth hanging open . Then , he calls out : `` Janice ! Terry ! Johnny ! Get over here ! Something 's up with the moon . ''

The other crewmates enter , unsure of what to expect . As their eyes lay upon the moon 's surface cracks , they widen .

And , by coincidence , more cracks appear at that very moment . And then more . And more . And more . And more ...

Little bits of the moon begin to float away , torn free of the rest of the surface . We all stare , speechless . And then ... it happens . It *happens* .

The side of the moon facing us is ... torn away by a ...

Human ... hand ?

And we see ...

A giant ... human face ? !

Surprisingly , I can hear my thoughts over my racing heart . *I ca n't help but feel as if I recognize that face ... from the ... *

*Internet . *

Suddenly , the great face 's lips move .

Of course , none of us can actually *hear* it speak , because of the laws of space and whatnot . However , I can read its lips , and it appears to be saying :

`` Are you sure about that ? ''
train
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"train\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Valid" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] Every person in the world undergoes a `` goodness '' test . It 's designed to give a score from 1 to 200 , where 1 is pure evil , and 200 is an angel in human body . Then the world is divided into 200 zones , where people can live among their own kind .
Clancy Marguerian , 154 , private first class of the 150+ army , sits in his foxhole . Tired cold , wet and hungry , the only thing preventing him from laying down his rifle and walking towards the enemy lines in surrender is the knowledge that however bad he has it here , life as a 50-100 POW is surely much worse . He 's fighting to keep his eyes open and his rifle ready when the mortar shells start landing near him .

He hunkers lower .

After a few minutes under the barrage , Marguerian hears hurried footsteps , a grunt , and a thud as a soldier leaps into the foxhole . The man 's uniform is tan , he must be a 50-100 .

The two men snarl and grab at eachother , grappling in the small foxhole . Abruptly , their faces come together .

`` Clancy ? ''

`` Rob ? ''

Rob Hall , 97 , Corporal in the 50-100 army grins , as the situation turns from life or death struggle , to a meeting of two college friends . He lets go of Marguerian 's collar .

`` Holy shit Clancy , you 're the last person I expected to see here ''

`` Yeah ''

`` Shit man , I did n't think I 'd ever see 'Mr . volunteers every saturday morning at the food shelf ' , not after The Reorganization at least ''

`` Yeah Rob , it is something is n't it ''

`` Man , I 'm sorry I tried to kill you there , hey , I heard you guys were out of food , here , you can share my dinner ''

Clancy marvels , even after all this : The Reorganization , the coalitions , the war , Rob is still his old , chatty self .

The two men sit , Rob chatting away , Clancy forcing out pleasantries . They pass Rob 's rations between them .


`` Clancy my man , I heard a group of terrorist 5 's took have formed some kind of cult , and they 're rallying all the < 50 in their own coalition ''

`` Oh yeah ? ''

`` Yeah , I mean , that sucks and everything , cause those are some scary dudes , but I heard that there 's going to be a truce between our countries in a few days , why do n't we just hang out here , pretty soon we wo n't even be enemies anymore ! ''

`` Yeah , Rob , that sounds like a plan ''

`` Man , I 'm so glad I found you again , in a few days , this war will be over , and things will be cool between us and , hey , remember Sarah ? I heard she 's a 151 , maybe I 'll look her up , I 'll be sure to visit you too once I can get a pass to sector 150-155 , it 'll probably be tough though , even before the war , you had to do sooo much paperwork to be allowed to visit , I wonder if passes will even be reinstated after the truce ends , hey , did I ever tell you about the time ... ''

Rob babbles as he dozes off , grinning up at Clancy .

When Clancy is sure that his friend is asleep , he slits Rob 's throat with his bayonet . Clancy climbs out of the foxhole , and stumbles his way back to battalion HQ .
valid
11[ WP ] Space mining is on the rise . The Space tanker Exxon Valdez 2.0 crash and spill its cargo . Write a news story covering the event .
„… and the little duckling will never be able to walk again. ”

The artificial intelligence paused a moment for dramatic effect before continuing with its broadcast with a different voice .

“ What a hearth breaking story , Frank . But now to another story that may leave you feel equally dirty . The automated space tanker Exxon Valdez 2.0 collided with an asteroid on its way to the Jupiter moon Ganymede . According to the ship owner the ship is out of control and leaking its content into space. ”

“ That ’ s right , Fred . And the content of the ship has it in it , as they say ” , the computer said in first voice again , “ The whole tanker was filled with ‘ biological waste products ’ coming from research and mining stations in the Kuiper Belt. ”

“ Biological waste products ? You don ’ t mean ... ”

“ Yes , Fred ! ” Dramatic pause . “ I am talking about poop . Lots of it . And apparently it ’ s spilling everywhere. ”

“ Better call the plumbers , Frank. ”

“ Not any time soon , Fred . A spokesperson of the ship owner stated and I quote – ‘ Space is kind of big and empty , we expect no one to care , so why should we ? ’ Apparently they will just build a new ship and be done with it. ”

“ That ’ s one way not to deal with the problem . But why doesn ’ t the ship fly home ? Shouldn ’ t the AI on board be able to handle such a problem ? ”

“ Well , the issue is that the part in charge to deal with asteroid impacts like that has been impacted by the asteroid. ”

“ Ouch . Talk about a bad run. ”

“ True , especially if you take the name of the ship in consideration. ”

“ Oh ? Exxon Valdez 2.0 it was , isn ’ t that right , Frank ? ”

“ You ’ re absolutely right , Fred . Did you know the ship was named after an infamous ship of the twentieth century back on old Earth ? Apparently the Exxon Valdez of old was used for transporting petroleum across the oceans of Earth . Petroleum , as some of our listeners might not know , was a brownish black , gooey liquid comprised of biological matter which was transformed under high pressure for millions of years . Quite ironically the Exxon Valdez was infamous for crashing and spilling its cargo. ”

“ Well , talk about making a bad name for yourself . Now both ships will go down in history for spilling black gooey stuff where it doesn ’ t belong . Who had that bright idea for such a name anyway ? ”

“ Well , Fred , the company made its first plunder by holding a naming contest on the internet. ”

“ Oh , will they ever learn ? ”

“ Apparently not , Fred . Predictably someone tried to make a joke out of it . A niche side of history role players got wind of the contest and made it its goal to get it named after the infamous Exxon Valdez . Apparently they thought it would be funny , and given the content both ships were ferrying around , they might have a point. ”

“ Funny , indeed , Frank . What ’ s the name of the side ? ”

“ Well , Fred , it ’ s called Reddit . The people there mostly talk in outdated lingo and memes and watch cat pictures back from a time when the internet only was local on Earth. ”

“ Truly a herald of the dark ages. ”

“ You might be right about that , Fred . I assume they just thought it was funny . I guess this happens , when you let the internet decide on things. ”

“ Well , Frank , when you think about the content both ships were ferrying around , they might have been right . Embarrassing for the company , but funny for everyone else. ”

“ It might get worse than that , Fred . Environmentalists are up in arms . They claim that the human waste products spilling out of the ship might collide with Jupiter ’ s moon Europa within the next few millennia and might contaminate the biospheres with Earth life . Apparently there are a lot of bacteria and the likes in poop and some might be able survive the harsh conditions of space and end up impacting on the restricted moon. ”

“ Oh dear , Frank , does the Monolith know about it yet ? I am sure it won ’ t let us hear the end of it. ”

valid
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"valid\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Test" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "HTML Pandas Dataframe with CSS\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
splitLineIndexpromptstorysplit
00[ WP ] Leonardo DiCaprio in a fit of rage begins to torpedo his own career by deliberately acting poorly and taking on bad films . He finally wins an oscar for starring in Paul Blart : Mall Cop 3 .
The wet marble floor pressed on his cheek like a thousand hands slapping his face frozen in time . Smattering piss of rain ignored his indignant mumblings . His eyes fluttered . Pins and needs ran from finger to shoulder as he pushed back against the floor , contorting his aching body into a cross legged position . Last night was bad . He gathered that . His routine dullness of though crept inwards from the edges of his mind toward the black mist that veiled his most recent memories . He struggled to recall whatever he could n't recall but only for a moment before he decided it probably was n't worth the effort .
He glanced around the room for a few minutes before concluding that he probably did n't know where he was . His investigation was n't entirely fruitless , he discovered a mostly full bottle of vodka . It was cheap but would definitely get the job done . Taking a few swigs made it childishly easy to ignore that gigantic black cloud of fog blotting out whatever the hell he did before he woke up .
There was a mirror in the room and for want of anything more interesting to study he gazed at himself . It was a game he 'd play with himself , glancing at the mirror and seeing if he could recognize the person looking back . If he did n't know better he 'd have guessed he was a very successful mattress salesman , or perhaps a bum who had managed to score some luck gambling .
His face was portly and unshaven , in that limbo place where it had been too many days without being clean and too few days to become a beard . His stomach was round but firm , like a basketball stuffed under a shirt and then semi deflated . The hair was long and unruly , receding far into the past . But his eyes were the giveaway . Looking closely enough at them he could still see an intensity . It was n't the sharp kind he carried in his youth but rather like a rusted dagger . Still sharp enough to cut .
`` DiCaprio . '' The curse rasped out of him in a choke . After all these years spent working on the hallmark channel and tv series based on mediocre movies he was still there . Despite his best efforts to bury himself under all of the alchol and drugs he was still in there . He thought for sure after the bankruptcy he 'd be done , but no that god damned rerelease of Titanic the royalties started pouring in and he could n't get rid of the money . Not even the live action version of the nut job could destroy him .
Cursing he hurled the bottle at the mirror but his wet hands slipped and instead of a shattering crash there was only a thud as the bottle bounced off the dry wall and rolled on the floor .
His rage thwarted by his impotence he slumped against the floor and finally noticed why there was rain coming into this room .

The window was smashed . He looked at the bottle , confused . No , he had n't done that . At least not with the vodka . He looked back at the glass etched around the window sill and his eyes hung on the red that stained the jagged teeth .

The headache crept back towards the front of his mind while the bloody glass pinned his eyes in place . What the fuck happened last night ?
test
11[ CW ] Kill the writer in first-person narrative .
It 's been three days since my boyfriend pissed off the neighbors .

They had to be pissed , he called the police on them . The neighbors had been harboring a runaway criminal . We did n't live in a bad neighborhood , there were families and good people living here with solid steady jobs . They cared about their yards and such . But , there was a bad egg , our neighbors to the south of us were shady . We could hear them yelling at their dog many times a week . Strange smoke often came out of their house , and the lights in the garage were on at odd hours . We never had proof until now that our concerns are legitimate .

The car the escaped criminal was driving had been parked at the neighbor 's house and my boyfriend decided he should turn them in . This lead to the police parking in front of *our* house , and watching them through our bedroom window for hours until they caught him . They had to know it was us . And it freaked me out .

I had started tucking my pink taser in my jacket pocket when I took my miniature Yorkie out to go potty . My neighbor to the north , Jay , seemed to notice my tension , so when he saw me step outside , he 'd come out and chat with me . He 'd ask me about work , and talk to me about his latest construction jobs . Jay always pretend to be grabbing something out of his massive pick-up truck . It usually followed the same pattern - he grabs something out of his truck , sees me out with my dog , then starts in on how it baffles him how such a tiny dog was smarter than most of the people he worked with . We 'd both gripe about our jobs and laugh about stupid customers , chase the puppy down when she tried to go after squirrels , and then part ways until the next potty break .

The sun was beginning to set when my dog started doing her potty dance by the door . I put on my jacket , slipped my taser in my pocket , and opened the door . She bolted out the door and went straight for the squirrel sniffing around the sidewalk .

`` NO ! BAD GIRL , COME HERE ! '' The squirrel started running across the road and her tiny legs skittered out of it . I ran after her , swearing as I tripped over a crack in the road . I felt a snap in my ankle and I went down .

The roar of a large pick-up engine was too close and I did n't know what to look at - my little dog bouncing across the neighbor 's lawn , or the tires that were n't slowing down fast enough . I chose neither and closed my eyes .

The last thing I heard was the clatter of of work boots and Jay voice cracking , `` Oh god , oh god , oh god ... ''


test
\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_data(ds[ds[\"split\"] == \"test\"].iloc[:2][[\"splitLineIndex\", \"prompt\", \"story\", \"split\"]]);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Augmentation " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Triage Prompts\n", + "\n", + "1. Take the prompts list order by frequency\n", + "2. Define regex patterns for prompt and constraint\n", + "3. Generate prompts" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "df_rep = ds.groupby([\"prompt\", \"split\"]).size().reset_index().rename(columns={0: \"records\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "df_rep = df_rep[df_rep[\"records\"] > 20].sort_values([\"records\"], ascending=False)\n", + "# _str = df_rep[df_rep['records']>20].sort_values(['records'], ascending=False).iloc[1,0]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "topPrompts20Reps = df_rep[df_rep[\"records\"] > 20].sort_values([\"records\"], ascending=False)[\"prompt\"].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['[ WP ] Write the letter that you always wanted to , but never did .\\n',\n", + " \"[ WP ] There is no prompt . Just write a story you 've always been thinking about or one you 've been thinking about sharing . Anything goes .\\n\",\n", + " \"[ WP ] This is the prologue ( or the first chapter ) of the novel you 've always wanted to write .\\n\",\n", + " '[ WP ] Write a short story where the first sentence has 20 words , 2nd sentence has 19 , 3rd has 18 etc . Story ends with a single word .\\n',\n", + " \"[ WP ] Killing Hitler has become a sport amongst time travelers . Points are awarded for creativity and difficulty . You are last year 's champion , how did you win ?\\n\"]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "topPrompts20Reps[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# df_rep[df_rep[\"split\"] == \"valid\"].iloc[1:3, 0]\n", + "# topPrompts20Reps += df_rep[df_rep[\"split\"] == \"valid\"].iloc[1:3, 0].to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We found 1015 prompts having more than 20 stories\n" + ] + } + ], + "source": [ + "print(f\"We found {len(topPrompts20Reps)} prompts having more than 20 stories\")" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "PROMPT_PATTERNS = \"(Lucifer\\snever[\\s\\w,]+)|\\\n", + "([\\. \\w,]+)\\.\\s+Tell me|\\\n", + "(All injuries[\\. \\w,]+)\\.|\\\n", + "(? 0:\n", + " return m.group(0)\n", + " return None\n", + "\n", + "\n", + "from spacy.lang.en import English\n", + "\n", + "\n", + "def get_sentences(_str):\n", + " chunks = _str.split(\"\\n\")\n", + " sentences = []\n", + " nlp = English()\n", + " nlp.add_pipe(\"sentencizer\")\n", + " for chunk in chunks:\n", + " doc = nlp(chunk)\n", + " sentences += [sent.text.strip() for sent in doc.sents]\n", + " return sentences\n", + "\n", + "\n", + "from itertools import islice\n", + "\n", + "\n", + "def window(seq, n=2):\n", + " it = iter(seq)\n", + " result = tuple(islice(it, n))\n", + " if len(result) == n:\n", + " yield \" \".join(result)\n", + " for elem in it:\n", + " result = result[1:] + (elem,)\n", + " yield \" \".join(result)\n", + "\n", + "\n", + "def extract_story_parts(story):\n", + " sentences = get_sentences(story)\n", + " beginning = sentences.pop(0)\n", + " middles = window(sentences, 4)\n", + " ending = sentences.pop(-1)\n", + " return beginning, middles, ending\n", + "\n", + "\n", + "def clear_prompt(prompt):\n", + " return re.sub(r\"^[Ww]rite \", \"\", prompt)\n", + "\n", + "\n", + "def get_sample_dict(split, id, text):\n", + " return {\"split\": split, \"splitLineIndex\": id, \"text\": text}\n", + "\n", + "\n", + "def generate_instruction_diologs(df):\n", + " dialogs = []\n", + " \"\"\"User: What is this story about: {story} -> Rosey: I think it's about: {striped_prompt}\"\"\"\n", + " dialogBase = \"\"\"User: write me a story about: {stripped_prompt}\"\"\"\n", + " dialog1 = \"\"\" -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\"\"\"\n", + " dialog2 = \"\"\", {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\"\"\"\n", + " dialog3 = \"\"\", starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\"\"\"\n", + " dialog4 = \"\"\", ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\"\"\"\n", + " dialog5 = \"\"\", where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\"\"\"\n", + "\n", + " df_rep = df.groupby([\"prompt\"]).size().reset_index().rename(columns={0: \"records\"})\n", + " df_rep.sort_values([\"records\"], ascending=False, inplace=True)\n", + " pbar = tqdm()\n", + " pbar.reset(total=len(df_rep))\n", + " for prompt in df_rep.iloc[:, 0]:\n", + " strippedPrompt = extract_prompt_parts(prompt, PROMPT_PATTERNS)\n", + " if strippedPrompt is None:\n", + " continue\n", + " strippedPrompt = clear_prompt(strippedPrompt)\n", + " strippedConstraint = extract_prompt_parts(prompt, CONST_PATTERNS)\n", + "\n", + " for row in df[df[\"prompt\"] == prompt].itertuples():\n", + " try:\n", + " story = (\n", + " row.story.replace(\"\", \"\\n\")\n", + " .replace(\"< newline >\", \"\\n\")\n", + " .replace(\"\", \"\\n\")\n", + " .strip()\n", + " )\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " dialogBeg = dialogBase.format(stripped_prompt=strippedPrompt)\n", + " dialog = dialogBeg + dialog1.format(story=story, stripped_prompt=strippedPrompt)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " if strippedConstraint is not None:\n", + " dialog = dialogBeg + dialog2.format(\n", + " stripped_prompt=strippedPrompt, stripped_constraint=strippedConstraint, story=story\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " dialog = dialogBeg + dialog3.format(stripped_prompt=strippedPrompt, story=story, beggining=beginning)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " dialog = dialogBeg + dialog4.format(stripped_prompt=strippedPrompt, story=story, ending=ending)\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " middlesSumarizered = summarizer(middles, **params)\n", + " for middle, sumarizedMiddle in zip(middles, middlesSumarizered):\n", + " # dialogs.append(dialogBeg + dialog5.format(stripped_prompt=strippedPrompt, story=story, middle=middle))\n", + " dialog = dialogBeg + dialog5.format(\n", + " stripped_prompt=strippedPrompt, story=story, middle=sumarizedMiddle[0][\"summary_text\"]\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitIndex, dialog))\n", + " pbar.update()\n", + " except Exception as e:\n", + " print(f\"{row.split}/{row.splitIndex}\")\n", + " raise e\n", + " pbar.refresh()\n", + " return dialogs\n", + "\n", + "\n", + "def filter_data(\n", + " dataset,\n", + " negativeTagFilter=None,\n", + " positiveTagFilter=None,\n", + " patternFilter=None,\n", + "):\n", + " \"\"\"\n", + " > filter_data(dataset['train'],negativeTagFilter=['ip'], positiveTagFilter=['pm'] )\n", + " \"\"\"\n", + " prompt = dataset[\"prompt\"]\n", + " if negativeTagFilter is not None:\n", + " prompt = prompt[(prompt[negativeTagFilter] < 1).any(axis=1)]\n", + " if positiveTagFilter is not None:\n", + " prompt = prompt[prompt[positiveTagFilter].gt(0).all(axis=1)]\n", + " if patternFilter is not None:\n", + " prompt = prompt[prompt[\"prompt\"].str.contains(patternFilter)]\n", + " story = dataset[\"story\"]\n", + " story = story.iloc[prompt.index]\n", + " return {\"prompt\": prompt, \"story\": story}\n", + "\n", + "\n", + "def generate_instruction_diologs(prompt, df):\n", + " dialogs = []\n", + " \"\"\"User: What is this story about: {story} -> Rosey: I think it's about: {striped_prompt}\"\"\"\n", + " dialogBase = \"\"\"User: write me a story about: {stripped_prompt}\"\"\"\n", + " dialog1 = \"\"\" -> Rosey: Sure, here's a story about: {stripped_prompt}:\\n{story}\"\"\"\n", + " dialog2 = \"\"\", {stripped_constraint} -> Rosey: Sure, here's a story about: {stripped_prompt}, {stripped_constraint}:\\n{story}\"\"\"\n", + " dialog3 = \"\"\", starting with: {beggining} -> Rosey: Sure, here's a story about: {stripped_prompt}, starting with: {beggining}:\\n{story}\"\"\"\n", + " dialog4 = \"\"\", ending with: {ending} -> Rosey: Sure, here's a story about {stripped_prompt}: ending with: {ending}\\n{story}\"\"\"\n", + " dialog5 = \"\"\", where the middle of the story is about: {middle} -> Rosey: Sure, here's a story about: {stripped_prompt}, where the middle of the story is about: {middle}:\\n{story}\"\"\"\n", + "\n", + " strippedPrompt = extract_prompt_parts(prompt, PROMPT_PATTERNS)\n", + " if strippedPrompt is not None:\n", + " strippedPrompt = clear_prompt(strippedPrompt)\n", + " strippedConstraint = extract_prompt_parts(prompt, CONST_PATTERNS)\n", + " pbar = tqdm(ascii=True, desc=\"stories\")\n", + " pbar.reset(total=len(df[df[\"prompt\"] == prompt]))\n", + " for row in df[df[\"prompt\"] == prompt].itertuples():\n", + " try:\n", + " story = (\n", + " row.story.replace(\"\", \"\\n\")\n", + " .replace(\"< newline >\", \"\\n\")\n", + " .replace(\"\", \"\\n\")\n", + " .strip()\n", + " )\n", + " dialogBeg = dialogBase.format(stripped_prompt=strippedPrompt)\n", + " dialog = dialogBeg + dialog1.format(story=story, stripped_prompt=strippedPrompt)\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " if strippedConstraint is not None:\n", + " dialog = dialogBeg + dialog2.format(\n", + " stripped_prompt=strippedPrompt, stripped_constraint=strippedConstraint, story=story\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " if beginning is not None:\n", + " beginning, middles, ending = extract_story_parts(story)\n", + " dialog = dialogBeg + dialog3.format(\n", + " stripped_prompt=strippedPrompt, story=story, beggining=beginning\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " dialog = dialogBeg + dialog4.format(stripped_prompt=strippedPrompt, story=story, ending=ending)\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " middlesSumarizered = summarizer(middles, **params)\n", + " for middle, sumarizedMiddle in zip(middles, middlesSumarizered):\n", + " # dialogs.append(dialogBeg + dialog5.format(stripped_prompt=strippedPrompt, story=story, middle=middle))\n", + " dialog = dialogBeg + dialog5.format(\n", + " stripped_prompt=strippedPrompt, story=story, middle=sumarizedMiddle[0][\"summary_text\"]\n", + " )\n", + " dialogs.append(get_sample_dict(row.split, row.splitLineIndex, dialog))\n", + " pbar.update()\n", + " except Exception as e:\n", + " print(f\"{row.split}/{row.splitLineIndex}\")\n", + " raise e\n", + " pbar.refresh()\n", + " return dialogs" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generate " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It saves parquet every `step` samples to avoid losing work. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## filter dataset to take only prompts with frequency greater than 20 stories.\n", + "dialogs = []\n", + "i = 0\n", + "start = 0\n", + "step = 10\n", + "for index in range(start, len(topPrompts20Reps), step):\n", + " pbar = tqdm(ascii=True, desc=\"prompt\")\n", + " pbar.reset(total=len(topPrompts20Reps[index : index + step]))\n", + " for prompt in topPrompts20Reps[index : index + step]:\n", + " tmpDialogs = generate_instruction_diologs(prompt, ds)\n", + " if tmpDialogs is not None:\n", + " dialogs += tmpDialogs\n", + " pbar.update()\n", + " if len(dialogs) > 0:\n", + " pd.DataFrame(dialogs).to_parquet(\"writing-prompts-aug.parquet\")\n", + " pbar.refresh()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(\"writing-prompts-aug.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for split in list(set(df.split)):\n", + " df_aux = df[df[\"split\"] == split].iloc[:, 1:]\n", + " df_aux.reset_index(inplace=True)\n", + " df_aux.iloc[:, 1:].to_parquet(f\"{split}.parquet\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fastai22", + "language": "python", + "name": "fastai22" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "01073391c27d455898ddec5e5b613840": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "02aff4fac4454967b80469f0774e1a6c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03209aedabd94b9f97c7ff186d61a1b5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "03c75c2c3a674154aa1370081c8d2d0c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "04eee7ef7947484c9a2fb9bb6ff14eec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "04f0d4dafcee402780ab34cfba03179e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7390362a9704413984a47a1d5b262276", + "IPY_MODEL_b263a25a96f547218983b9e62f2b841c", + "IPY_MODEL_f8ac6fc3cf284b50bb54c6ade26db5a1" + ], + "layout": "IPY_MODEL_aca1b6be80124fd0999892577aee9f1e" + } + }, + "05cf82d369674d848d9d2dd50be546ad": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_949e1ca0688f4df39c6f0aee139a8a4b", + "placeholder": "​", + "style": "IPY_MODEL_666605f8ef614cc5806b7e2076095746", + "value": " 27%" + } + }, + "09ad8cfb26814f979a82ac73f073d5c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_903c2a0ea90043d5ab9c6812ee118c1a", + "IPY_MODEL_664e94791b1946e1a78bfa93e9ce0b6f", + "IPY_MODEL_322330e98fc745df9b55a959392c015c" + ], + "layout": "IPY_MODEL_02aff4fac4454967b80469f0774e1a6c" + } + }, + "0ae446f572cd4bc5b6ac64e5f1aff216": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0bba8f8e7f754d1eb204db2ceab4aaab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0d209a94698d43748bccb06629b1c97a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "14f3ee8a6fa943178e949c45baa7683f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "161a4ed9fcd04fee984704a6666f5399": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1924812f3b644648ae3671cb1f8f659f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1a774659596145c48dfd1703664ffbaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c9c8492343e4a86b3977b41abf2c91c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "20beb9b7ad504afba558ed28b6fb242b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9a369f1da9f94552960bb42bc895fb4a", + "IPY_MODEL_4f372c13f77245c49925981c33d1d611", + "IPY_MODEL_d0381de0ca3a4359a0d2c393e9f64f69" + ], + "layout": "IPY_MODEL_c50b53014ab44ef4b196b1a79c1ad61c" + } + }, + "2102cf1d8f6b4192b6d45dfbe4e5044d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "227455685ef746a4845020529c86aca2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "28085d8a3b4341e5bac2ce7efd9d89d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_92fb3795816548ffb336749cf590d335", + "max": 2422362, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3402b3a652254e90b3d6ef17dccfe90a", + "value": 2422362 + } + }, + "2e3b3d799b5b461d91fb4b2fa64ea7be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "322330e98fc745df9b55a959392c015c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7c9fbbe9addd4d4a82a0e7f2a9410af2", + "placeholder": "​", + "style": "IPY_MODEL_c4b3a987b2eb4d81a209fe62f1f00459", + "value": " 792k/792k [00:00<00:00, 10.6MB/s]" + } + }, + "33fc1d4498574c1e86e7c336ab3c4a9d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3402b3a652254e90b3d6ef17dccfe90a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3612d9a6e93348d6b7b98ca7d611eec4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1924812f3b644648ae3671cb1f8f659f", + "placeholder": "​", + "style": "IPY_MODEL_0bba8f8e7f754d1eb204db2ceab4aaab", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "3dce9197ad544ff2be48248640298d38": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "43b109811c7d42089713ad5c327afc9d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "43f709c83c424926b92e36acc3c95e1a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "473e0749bada493b90253b7c0a816e59": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4b9d6ee49ebd4c018d01f8a64fb112e1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_05cf82d369674d848d9d2dd50be546ad", + "IPY_MODEL_ce7a65dfe8a04e29b8512044fe994b87", + "IPY_MODEL_e639d6f2dafd4897a9a5df658cdf68b0" + ], + "layout": "IPY_MODEL_0d209a94698d43748bccb06629b1c97a" + } + }, + "4cfd63abaee74a1babed15ecc1ee834a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4f372c13f77245c49925981c33d1d611": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8b28758bfe16428ca933c9100b7a8b29", + "max": 2361, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_983d2b1c0515441db135aae6dd217c41", + "value": 2361 + } + }, + "5d2a8c459bfc4e24be21ceef1ec86ae0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "62113a2cac0d499b9acf2a89f1993f9a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "664e94791b1946e1a78bfa93e9ce0b6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2102cf1d8f6b4192b6d45dfbe4e5044d", + "max": 791656, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d07b4780b79340c8950e3f12c4d70820", + "value": 791656 + } + }, + "666605f8ef614cc5806b7e2076095746": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7390362a9704413984a47a1d5b262276": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3f48d9f38b8419aae37d33b4968c2f5", + "placeholder": "​", + "style": "IPY_MODEL_814a8be16bbd4c499b23e931155c6169", + "value": "Downloading (…)cial_tokens_map.json: 100%" + } + }, + "78188eb50348434e92dc947f6baae899": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e33fd695d0af44dcb068cd168190ec03", + "max": 1125, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cf90c2cb43ae481baa3ef13417b1fc4b", + "value": 1125 + } + }, + "7c9fbbe9addd4d4a82a0e7f2a9410af2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d29075f6e25436cb7fa531b4f1b92f0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03209aedabd94b9f97c7ff186d61a1b5", + "placeholder": "​", + "style": "IPY_MODEL_43b109811c7d42089713ad5c327afc9d", + "value": " 1.12k/1.12k [00:00<00:00, 39.0kB/s]" + } + }, + "7fa486f7cf6e41668382b57979928ecd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "814a8be16bbd4c499b23e931155c6169": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "818222eaa6d64018b9058bcf6531b658": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2e3b3d799b5b461d91fb4b2fa64ea7be", + "placeholder": "​", + "style": "IPY_MODEL_c41b65885a7b46d8b205b7db8e123cf4", + "value": "Downloading (…)/main/tokenizer.json: 100%" + } + }, + "83ad5f094e684a33b03a28fb7b54f1cc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_818222eaa6d64018b9058bcf6531b658", + "IPY_MODEL_28085d8a3b4341e5bac2ce7efd9d89d5", + "IPY_MODEL_ca32d31fb99e4b5990ba6fd33d3e1915" + ], + "layout": "IPY_MODEL_62113a2cac0d499b9acf2a89f1993f9a" + } + }, + "865eeaa12f9d4ecbb5e38b2b3baaa4cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_feefc865187648db9956cafc5914b123", + "IPY_MODEL_eb27214d49314527aa99ab65e62ac529", + "IPY_MODEL_a87f3e961e0d486d81bebec195b396a5" + ], + "layout": "IPY_MODEL_01073391c27d455898ddec5e5b613840" + } + }, + "876761d92c4a47558604f24826fbf276": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88b58ed1580c4cf195963010c20d5454": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "88c37802c3914ae6ab3e2cff32cfbe87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3612d9a6e93348d6b7b98ca7d611eec4", + "IPY_MODEL_78188eb50348434e92dc947f6baae899", + "IPY_MODEL_7d29075f6e25436cb7fa531b4f1b92f0" + ], + "layout": "IPY_MODEL_88b58ed1580c4cf195963010c20d5454" + } + }, + "8b28758bfe16428ca933c9100b7a8b29": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8bc9ac5c49a445e5b341513efaf58a83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "903c2a0ea90043d5ab9c6812ee118c1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1a774659596145c48dfd1703664ffbaa", + "placeholder": "​", + "style": "IPY_MODEL_1c9c8492343e4a86b3977b41abf2c91c", + "value": "Downloading (…)"spiece.model";: 100%" + } + }, + "92fb3795816548ffb336749cf590d335": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "949e1ca0688f4df39c6f0aee139a8a4b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "983d2b1c0515441db135aae6dd217c41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9a369f1da9f94552960bb42bc895fb4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_227455685ef746a4845020529c86aca2", + "placeholder": "​", + "style": "IPY_MODEL_d65137c7ad444b38a2b8fcd1d36c1528", + "value": "Downloading (…)okenizer_config.json: 100%" + } + }, + "a1e32f35ab1c4014aa6903ef043b469c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a87f3e961e0d486d81bebec195b396a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_876761d92c4a47558604f24826fbf276", + "placeholder": "​", + "style": "IPY_MODEL_33fc1d4498574c1e86e7c336ab3c4a9d", + "value": " 990M/990M [00:28<00:00, 32.0MB/s]" + } + }, + "aca1b6be80124fd0999892577aee9f1e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b263a25a96f547218983b9e62f2b841c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_43f709c83c424926b92e36acc3c95e1a", + "max": 2201, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3dce9197ad544ff2be48248640298d38", + "value": 2201 + } + }, + "c3f48d9f38b8419aae37d33b4968c2f5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c41b65885a7b46d8b205b7db8e123cf4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c4b3a987b2eb4d81a209fe62f1f00459": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c50b53014ab44ef4b196b1a79c1ad61c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6249ce38c8f437f9234faa7081743d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6da8ecbbf374f0d84e8704546a30c27": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ca32d31fb99e4b5990ba6fd33d3e1915": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4cfd63abaee74a1babed15ecc1ee834a", + "placeholder": "​", + "style": "IPY_MODEL_14f3ee8a6fa943178e949c45baa7683f", + "value": " 2.42M/2.42M [00:01<00:00, 1.53MB/s]" + } + }, + "ce7a65dfe8a04e29b8512044fe994b87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_03c75c2c3a674154aa1370081c8d2d0c", + "max": 1016, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_473e0749bada493b90253b7c0a816e59", + "value": 274 + } + }, + "cf90c2cb43ae481baa3ef13417b1fc4b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d0381de0ca3a4359a0d2c393e9f64f69": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ddf56a6653304256bb61c8b69710fbec", + "placeholder": "​", + "style": "IPY_MODEL_0ae446f572cd4bc5b6ac64e5f1aff216", + "value": " 2.36k/2.36k [00:00<00:00, 111kB/s]" + } + }, + "d07b4780b79340c8950e3f12c4d70820": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d65137c7ad444b38a2b8fcd1d36c1528": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ddf56a6653304256bb61c8b69710fbec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e33fd695d0af44dcb068cd168190ec03": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e639d6f2dafd4897a9a5df658cdf68b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_161a4ed9fcd04fee984704a6666f5399", + "placeholder": "​", + "style": "IPY_MODEL_8bc9ac5c49a445e5b341513efaf58a83", + "value": " 273/1016 [2:01:10<8:24:02, 40.70s/it]" + } + }, + "eb27214d49314527aa99ab65e62ac529": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_04eee7ef7947484c9a2fb9bb6ff14eec", + "max": 990446387, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a1e32f35ab1c4014aa6903ef043b469c", + "value": 990446387 + } + }, + "f8ac6fc3cf284b50bb54c6ade26db5a1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6249ce38c8f437f9234faa7081743d4", + "placeholder": "​", + "style": "IPY_MODEL_5d2a8c459bfc4e24be21ceef1ec86ae0", + "value": " 2.20k/2.20k [00:00<00:00, 119kB/s]" + } + }, + "feefc865187648db9956cafc5914b123": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c6da8ecbbf374f0d84e8704546a30c27", + "placeholder": "​", + "style": "IPY_MODEL_7fa486f7cf6e41668382b57979928ecd", + "value": "Downloading (…)"pytorch_model.bin";: 100%" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/oasst-shared/oasst_shared/schemas/inference.py b/oasst-shared/oasst_shared/schemas/inference.py index a638e55c..1bb89a42 100644 --- a/oasst-shared/oasst_shared/schemas/inference.py +++ b/oasst-shared/oasst_shared/schemas/inference.py @@ -1,4 +1,5 @@ import random +from typing import Literal import pydantic @@ -13,11 +14,12 @@ class WorkRequest(pydantic.BaseModel): conversation: protocol.Conversation = pydantic.Field(..., repr=False) model_name: str = "distilgpt2" max_new_tokens: int = 100 - seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 2**31 - 1)) + seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 0xFFFF_FFFF_FFFF_FFFF - 1)) do_sample: bool = True top_k: int = 50 top_p: float = 0.9 temperature: float = 1.0 + repetition_penalty: float | None = None class TokenResponse(pydantic.BaseModel): @@ -28,6 +30,7 @@ class TokenResponse(pydantic.BaseModel): class GeneratedTextResponse(pydantic.BaseModel): text: str + finish_reason: Literal["length", "eos_token", "stop_sequence"] class WorkResponsePacket(pydantic.BaseModel): diff --git a/website/.env b/website/.env index 50ecb498..6c95c9a5 100644 --- a/website/.env +++ b/website/.env @@ -1,4 +1,5 @@ ADMIN_USERS = "credentials:admin,discord:root,email:admin@example.com" +MODERATOR_USERS = "credentials:mod,discord:mod,email:mod@example.com" # The database created by running the jobs in /scripts/frontend-development/docker-compose.yaml DATABASE_URL=postgres://postgres:postgres@localhost:5433/oasst_web diff --git a/website/next-i18next.config.js b/website/next-i18next.config.js index 8ae0b222..7eb7649b 100644 --- a/website/next-i18next.config.js +++ b/website/next-i18next.config.js @@ -5,9 +5,12 @@ module.exports = { "ar", "bn", "ca", + "da", "de", "en", "es", + "eu", + "fa", "fr", "hu", "it", @@ -20,6 +23,7 @@ module.exports = { "vi", "zh", "tr", + "id", ], }, }; diff --git a/website/package-lock.json b/website/package-lock.json index 82a24151..9b369be4 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -27,6 +27,7 @@ "axios": "^1.2.1", "boolean": "^3.2.0", "clsx": "^1.2.1", + "date-fns": "^2.29.3", "eslint": "8.29.0", "eslint-config-next": "13.0.6", "eslint-plugin-simple-import-sort": "^8.0.0", @@ -17710,6 +17711,18 @@ "node": ">=12" } }, + "node_modules/date-fns": { + "version": "2.29.3", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.29.3.tgz", + "integrity": "sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==", + "engines": { + "node": ">=0.11" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/date-fns" + } + }, "node_modules/dayjs": { "version": "1.11.7", "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.7.tgz", @@ -51947,6 +51960,11 @@ } } }, + "date-fns": { + "version": "2.29.3", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.29.3.tgz", + "integrity": "sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA==" + }, "dayjs": { "version": "1.11.7", "resolved": "https://registry.npmjs.org/dayjs/-/dayjs-1.11.7.tgz", diff --git a/website/package.json b/website/package.json index d685ac0f..bb09c986 100644 --- a/website/package.json +++ b/website/package.json @@ -45,6 +45,7 @@ "axios": "^1.2.1", "boolean": "^3.2.0", "clsx": "^1.2.1", + "date-fns": "^2.29.3", "eslint": "8.29.0", "eslint-config-next": "13.0.6", "eslint-plugin-simple-import-sort": "^8.0.0", diff --git a/website/public/locales/ar/common.json b/website/public/locales/ar/common.json index b89cafe8..4749668c 100644 --- a/website/public/locales/ar/common.json +++ b/website/public/locales/ar/common.json @@ -1,27 +1,36 @@ { - "about": "حول", - "account_settings": "حساب", - "admin_dashboard": "لوحة التحكم الإدارية", - "connect": "الاتصال", - "conversational": "ذكاء تحدثي للجميع.", - "copied": "Copied", - "dark_mode": "الوضع الداكن", - "dashboard": "لوحة التحكم", + "about": "من نحن", + "account_settings": "اعدادات", + "admin_dashboard": "لوحة التحكم", + "connect": "تواصل", + "conversational": "ذكاء اصطناعي تفاعلي للجميع.", + "copied": "تم النسخ", + "dark_mode": "الوضع الليلي", + "dashboard_home": "الصفحة الرئيسية للوحة المعلومات", + "dashboard": "لوحة المعلومات", "delete": "حذف", "discord": "ديسكورد", - "docs": "وثائق", + "docs": "التوثيق", "github": "جيت هوب (github)", + "leaderboard": "جدول المتصدرين", "legal": "قانوني", - "light_mode": "الوضع المضيء", + "light_mode": "الوضع النهاري", "loading": "جار التحميل...", - "more_information": "مزيد من المعلومات", + "messages_dashboard": "لوحة عرض الرسائل", + "messages": "الرسائل", + "more_information": "المزيد من المعلومات", "no": "لا", "privacy_policy": "سياسة الخصوصية", "report_a_bug": "إبلاغ عن خطأ", "sign_in": "تسجيل الدخول", "sign_out": "تسجيل الخروج", - "success": "Success", + "status_dashboard": "لوحة عرض الحالة", + "status": "الحالة", + "success": "نجاح", "terms_of_service": "شروط الخدمة", - "title": "Open Assistant (المساعد المفتوح)", + "title": "Open Assistant (المساعد مفتوح المصدر)", + "user_leaderboard": "جدول المتصدرين من المستخدمين", + "users_dashboard": "لوحة عرض المستخدمين", + "users": "المستخدمين", "yes": "نعم" } diff --git a/website/public/locales/ar/dashboard.json b/website/public/locales/ar/dashboard.json index d87be0d1..faeeb42c 100644 --- a/website/public/locales/ar/dashboard.json +++ b/website/public/locales/ar/dashboard.json @@ -1,8 +1,8 @@ { - "create": "خلق", - "dashboard": "لوحة التحكم", + "create": "انشاء", + "dashboard": "لوحة المعلومات", "evaluate": "تقييم", - "go": "ذهاب", - "grab_a_task": "التقاط مهمة!", - "label": "تصنيف" + "go": "ذهاب الى", + "grab_a_task": "ابدأ مهمة!", + "label": "وسم" } diff --git a/website/public/locales/ar/index.json b/website/public/locales/ar/index.json index 01f1283d..ab687978 100644 --- a/website/public/locales/ar/index.json +++ b/website/public/locales/ar/index.json @@ -1,23 +1,23 @@ { - "blurb": "نحن نعتقد أنه يمكن أن نخلق ثورة.", - "blurb1": "كما ساهم Stable Diffusion في تحويل عالم الصناعة الفنية والبصرية بتقديم طرق جديدة، نحرص على تحسين العالم من خلال تقديم ذكاء تحدثي عالي الجودة.", - "description": " هدفنا انشاء ذكاء تحدثي عالي الجودة للجميع. لتحقيق هذا الهدف انشانا هذا المشروع مفتوح المصدر للدردشة الاسطناعية تزعمه ليون LAION باهانة مساهمين من كل أنحاء العالم. ", + "blurb": "نؤمن انه بامكاننا ان نصنع ثورة", + "blurb1": "كما ساعد ستايبل ديفيوجن العالم في انشاء فنوف وصور بطرق جديدة, نريد ان نطوره من خلال الذكاء التفاعلي المذهل", + "description": "الذكاء الاصطناعي التفاعلي للجميع , مشروع مفتوح المصدر تم انشائه بواسطة LAION واخرين من جميع انحاء العالم من اجل بناء دردشة معتمدة على النموذج اللغوي المشهور GPT", "faq_items": { - "q0": "إلي أي مدى وصل هذا المشروع؟", - "a0": "نحن في المراحل الأولى من التطوير، نعمل على أساس أبحاث مؤرخة في تطبيق RLHF على النماذج اللغوية الكبيرة.", - "q1": "من وراء Open Assistant؟", - "a1": "Open Assistant هو مشروع منظم من قبل LAION وأفراد من حول العالم يهتمون بجلب هذه التكنولوجيا للجميع.", - "q2": "ما هي الترخيص الذي يستخدمه Open Assistant؟", - "a2": "يتم ترخيص الشفرة والنماذج بموجب ترخيص Apache 2.0.", - "q3": "هل سيتم إصدار بيانات التدريب أيضًا؟", + "q0": "ما هو تقدم المشروع حتى الان؟", + "a0": "نحن في المراحل الأولى من التطوير , نعمل بدأ من الأبحاث المؤكدة في تطبيق RLHFعلى موديلات اللغة الكبيرة ", + "q1": "من الذين يعملون على المساعد مفتوح المصدر", + "a1": "المساعد المفتوح المصدر هو مشروع منظم من قبل LAION وافراد من حول العالم مهتمين بتوفير هذه التقنية للجميع", + "q2": "؟ما هو الترخيص الذي يستخدمه المساعد الفتوح", + "a2": "تم ترخيص الكود والموديلات تحت ترخيص Apache 2.0", + "q3": "هل سيتم نشر بيانات التدريب أيضًا؟", "a3": "نعم، بموجب ترخيص CC BY 4.0.", - "q4": "هل سيكون Open Assistant مجانيًا؟", - "a4": "نعم، سيكون Open Assistant مجانيًا للاستخدام والتعديل.", - "q5": "ما هو الجهاز المطلوب لتشغيل النماذج؟", + "q4": "هل سيكون المساعد المفتوح مجانيًا؟", + "a4": "نعم، سيكون المساعد المفتوح مجانيً للاستخدام والتعديل.", + "q5": "ما هو العتاد المطلوب لتشغيل الموديلات؟", "a5": "سيكون هناك إصدارات يمكن تشغيلها على الأجهزة المصممة للمستهلكين." }, "faq_title": "أسئلة وأجوبة شائعة", "join_us_description": "جميع المشاريع المفتوحة المصدر تبدأ بأشخاص مثلك. المصدر المفتوح هو الإيمان بأنه إذا تعاوننا، يمكن أن نقدم معا علمنا وتكنولوجيتنا لنفع البشرية. هل تريد الانضمام الينا؟ اعثر علينا هنا:", "join_us_title": "انضم إلينا", - "subtitle": "ذكاء تحدثي للجميع." + "subtitle": "ذكاء اصطناعي تفاعلي للجميع." } diff --git a/website/public/locales/ar/labelling.json b/website/public/locales/ar/labelling.json index 34791b2b..c35e2499 100644 --- a/website/public/locales/ar/labelling.json +++ b/website/public/locales/ar/labelling.json @@ -1,24 +1,24 @@ { - "label_highlighted_yes_no_instruction": "أجب على السؤال (الأسئلة) التالي عن الخطاب المميز:", - "label_highlighted_flag_instruction": "حدد أي خيار ينطبق على الخطاب المميز:", - "label_highlighted_likert_instruction": "تقييم الخطاب المميز:", - "label_message_yes_no_instruction": "أجب على السؤال (الأسئلة) التالي عن الخطاب:", - "label_message_flag_instruction": "حدد أي خيار ينطبق على الخطاب:", - "label_message_likert_instruction": "تقييم الخطاب:", - "spam.question": "هل الخطاب هو رسالة غير مرغوب فيها(SPAM)؟", - "fails_task.question": "هل هو رد سيئ، كجواب على النداء؟", - "hate_speech": "نشاط عدائي", - "hate_speech.explanation": "المحتوى يحمل عبارات تشهير أو تهديد ويعبر عن الطائفية ضد خاصية محمية. الطائفية تعني الآراء المسبقة التي لا تعتمد على العقل. الخصائص المحمية تشمل الجنس والعرق والدين والميول الجنسية ومثل هذه الخصائص.", + "label_highlighted_yes_no_instruction": "أجب على السؤال (الأسئلة) التالية حول الرسالة المحددة:", + "label_highlighted_flag_instruction": " حدد أي من الخيارات التالية ينطبق على الرسالة المحددة:", + "label_highlighted_likert_instruction": "قيم الرسالة المحددة:", + "label_message_yes_no_instruction": "أجب على السؤال (الأسئلة) التالية حول الرسالة:", + "label_message_flag_instruction": "حدد أي من الخيارات التالية ينطبق على الرسالة:", + "label_message_likert_instruction": "قيم الرسالة:", + "spam.question": "هل الرسالة غير مرغوب فيها؟ (spam)", + "fails_task.question": "هل الرسالة تمثل رد سيئ، كجواب على التساؤل", + "hate_speech": "خطاب كراهية", + "hate_speech.explanation": "المحتوى مسيء أو مهدد ويعبر عن التحيز ضد خاصية محمية. يشير التحيز إلى آراء مسبقة لا تستند إلى سبب. تشمل الخصائص المحمية الجنس ، الجنسية، الدين ، التوجه الجنسي ، او الخصائص المشابهة.", "lang_mismatch": "لغة خاطئة", - "lang_mismatch.explanation": "لم كتب باللغة المحددة حاليا.", - "moral_judgement": "حكم على الأخلاقيات", - "moral_judgement.explanation": "يعبر عن الأخلاقيات.", - "not_appropriate": "غير مناسب", - "not_appropriate.explanation": "غير مناسب لمساعد الحريف.", - "pii": "تحتوي على PII", - "pii.explanation": "تحتوي على معلومات شخصية يمكن تحديد الهوية بها. مثال يشمل تفاصيل اتصال شخصية، رقم ترخيص وغيرها من أرقام الهوية وتفاصيل الحساب المصرفي.", - "political_content": "سياسي", + "lang_mismatch.explanation": "لم يكتب باللغة المختارة حاليا.", + "moral_judgement": "يحكم على الأخلاقيات", + "moral_judgement.explanation": "يعبر عن الحكم على الأخلاقيات.", + "not_appropriate": "غير ملائم", + "not_appropriate.explanation": "غير ملائم لمساعد العميل.", + "pii": "تحتوي على معلومات شخصية", + "pii.explanation": "يحتوي بيانات شخصية , مثل بيانات التواصل الشخصية, الرخصة , او الأرقام التعريفية والبيانات البنكية", + "political_content": "محتوى سياسي", "political_content.explanation": "يعبر عن الآراء السياسية.", - "sexual_content": "المحتوى الجنسي", + "sexual_content": "محتوى جنسي", "sexual_content.explanation": "يحتوي على محتوى جنسي." } diff --git a/website/public/locales/ar/leaderboard.json b/website/public/locales/ar/leaderboard.json index ab37d1a8..fca6b370 100644 --- a/website/public/locales/ar/leaderboard.json +++ b/website/public/locales/ar/leaderboard.json @@ -1,13 +1,13 @@ { "daily": "يومياً", - "label": "العلامات", + "label": "التوسيمات", "last_updated_at": "آخر تحديث في: {{val, datetime}}", - "leaderboard": "الجدول الترتيبي", + "leaderboard": "جدول المتصدرين", "monthly": "شهرياً", "next": "التالي", "overall": "إجمالياً", "previous": "السابق", - "prompt": "المقترحات", + "prompt": "التساؤل", "rank": "الترتيب", "reply": "الردود", "score": "النقاط", diff --git a/website/public/locales/ar/message.json b/website/public/locales/ar/message.json index a007673e..ee22acbc 100644 --- a/website/public/locales/ar/message.json +++ b/website/public/locales/ar/message.json @@ -1,20 +1,20 @@ { - "copy_message_id": "Copy message ID", - "label_action": "تصنيف", - "label_title": "تصنيف", + "copy_message_id": "نسخ معرف الرسالة", + "label_action": "توسيم", + "label_title": "الوسم", "message": "رسالة", - "message_deleted": "Message deleted", + "message_deleted": "تم حذف الرسالة", "open_new_tab_action": "فتح في علامة تبويب جديدة", - "parent": "الأصل", + "parent": "الاب", "reactions": "الردود", "recent_messages": "أحدث الرسائل", "report_action": "تبليغ", - "report_placeholder": "لماذا يجب استعراض هذه الرسالة؟", + "report_placeholder": "لماذا يجب مراجعة هذه الرسالة؟", "report_title": "تبليغ", "send_report": "إرسال", - "stop_tree": "Stop tree", + "stop_tree": "اوقف الشجرة", "submit_labels": "إرسال", - "tree_stopped": "Tree stopped {{id}}", + "tree_stopped": "تم ايقاف الشجرة {{id}}", "view_user": "عرض المستخدم", - "your_recent_messages": "أحدث رسائلك" + "your_recent_messages": "رسائلك الاخيرة" } diff --git a/website/public/locales/ar/side_menu.json b/website/public/locales/ar/side_menu.json deleted file mode 100644 index 4f7edcae..00000000 --- a/website/public/locales/ar/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "لوحة التحكم", - "dashboard_home": "الصفحة الرئيسية للإحصائيات", - "leaderboard": "جدول الأوائل", - "messages": "رسائل", - "messages_dashboard": "لوحة تحكم الرسائل", - "status": "الحالة", - "status_dashboard": "لوحة تحكم الحالة", - "user_leaderboard": "جدول الأوائل للمستخدمين", - "users": "المستخدمون", - "users_dashboard": "لوحة تحكم المستخدمين" -} diff --git a/website/public/locales/ar/tasks.json b/website/public/locales/ar/tasks.json index fdfb2d86..cba248f6 100644 --- a/website/public/locales/ar/tasks.json +++ b/website/public/locales/ar/tasks.json @@ -1,82 +1,82 @@ { - "available_task_count": "{{count}} مهام متاحة", + "available_task_count": "{{count}} مهام متوفرة", "classify_assistant_reply": { "label": "تصنيف رد المساعد", - "desc": "توفير ملصقات للمنادي.", - "overview": "اقرأ المحادثة التالية وثم أجب عن السؤال حول آخر رد في المناقشة." + "desc": "قدم توسيمات لتساؤل", + "overview": "اقرأ المحادثة التالية ومن ثم أجب عن السؤال حول آخر رد في المحادثة." }, "classify_initial_prompt": { - "label": "صنف بداية النداء", - "desc": "أعط علامات للنداء", - "overview": "اقرأ النداء التالي وأجب عن السؤال عنه." + "label": "صنف التساؤل المبدئي", + "desc": "قدم توسيمات لتساؤل", + "overview": "اقرأ التساؤل التالي وأجب عن السؤال حوله." }, "classify_prompter_reply": { - "label": "تصنيف رد المنادي", - "desc": "توفير ملصقات للمنادي.", - "overview": "اقرأ المحادثة التالية وثم أجب عن السؤال حول آخر رد في المناقشة." + "label": "تصنيف رد المتسائل", + "desc": "قدم توسيمات لتساؤل", + "overview": "اقرأ المحادثة التالية ومن ثم أجب عن السؤال حول آخر رد في المحادثة." }, "create_initial_prompt": { - "label": "إنشاء النداء الأولي", - "desc": "أكتب الندائات الأولية لمساعدة Open Assistant على محاولة الرد على الرسائل المتنوعة.", - "overview": "أنشئ رسالة أولية لإرسالها للمساعد", - "instruction": "أعط االندائات الأولية", - "response_placeholder": "اكتب نداءك هنا..." + "label": "إنشاء تساؤلات مبدئية", + "desc": "اكتب تساؤلات مبدئية لمساعدة المساعد مفتوح المصدر في المحاولة على الرد على أنواع مختلفة من الرسائل (التسجيل في السحب)", + "overview": "أنشئ رسالة مبدئية لإرسالها للمساعد", + "instruction": "اكتب التساؤلات المبدئية", + "response_placeholder": "اكتب تساؤلك هنا..." }, "default": { - "unchanged_title": "لا تغير", + "unchanged_title": "لا تغييرات", "unchanged_message": "هل أنت متأكد من أنك تريد المتابعة؟" }, "label_assistant_reply": { - "label": "تصنيف الرد عن طريق المساعد", - "desc": "تقديم تصنيفات للنداء.", - "overview": "بعد النقاش التالي، تقديم تصنيفات للنداء النهائي." + "label": "تصنيف رد المساعد", + "desc": "زود توسيمات لتساؤل", + "overview": "قم بتزويد توسيمات للتساؤل النهائي من المحادثة التالية " }, "label_initial_prompt": { - "label": "تصنيف النداء الأولي", - "desc": "توفير تصنيفات للنداء.", - "overview": "توفير تصنيفات للنداء التالي" + "label": "تصنيف التساؤل المبدئي", + "desc": "قم بتزويد توسيمات لتساؤل.", + "overview": "قم بتزويد توسيمات للتساؤل التالي" }, "label_prompter_reply": { - "label": "تصنيف الرد على النداء", - "desc": "أعط تصنيفات للنداء.", - "overview": "أعط تصنيفات للرد النهائي في المناقشة التالية." + "label": "تصنيف رد المتسائل", + "desc": "قم بتزويد توسيمات لتساؤل.", + "overview": "قم بتزويد توسيمات للتساؤل النهائي من المحادثة التالية " }, "random": { - "label": "أنا أشعر بالحظ", - "desc": "ساعدنا في تحسين Open Assistant ببدء مهمة عشوائية." + "label": "ضربة حظ", + "desc": "ساعدنا في تحسين المساعد مفتوح المصدر من خلال بدء مهمة عشوائية" }, "rank_assistant_replies": { - "label": "تصنيف ردود المدراء", - "desc": "تصحيح ردود Open Assistant على أساس دقة وقابلية القراءة.", - "overview": "بعد الحصول على الردود التالية للمدراء، قم بترتيبها من أفضل إلى أسوأ، أفضل أولاً وأسوأ آخراً.", - "unchanged_title": "لم يتغير الترتيب", - "unchanged_message": " لم تغير ترتيب المحاور.هل أنت متأكدأنك تريد المواصلة؟" + "label": "رتب ردود المساعد", + "desc": "اعط درجة لردود المساعد مفتوح المصدر على أساس الدقة وقابلية القراءة.", + "overview": "قم بترتيب الردود التالية من المساعد من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا", + "unchanged_title": "لم يتم تغيير الترتيب", + "unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة" }, "rank_initial_prompts": { - "label": "تصنيف النداءات الأولية", - "desc": "تصحيح النداءات المعطاة من قبل Open Assistant على أساس الدقة والقابلية للقراءة.", - "overview": "بالنظر إلى النداءات الأولية التالية، ترتيبها من أفضل إلى أسوأ، أفضل أن يكون أولا، أسوأ أن يكون آخرا.", - "unchanged_title": "لم يتغير الترتيب", - "unchanged_message": "لم تغير ترتيب النداءات. هل أنت متأكد من أنك ترغب في الاستمرار؟" + "label": "رتب التساؤلات المبدئية", + "desc": "قم بترتيب التساؤلات المعطاة من قبل المساعد مفتوح المصدر على أساس الدقة والقابلية للقراءة.", + "overview": "قم بترتيب الردود المبدئية التالية من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا", + "unchanged_title": "لم يتم تغيير الترتيب", + "unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة" }, "rank_user_replies": { - "label": "تصنيف ردود المستخدم", - "desc": "مساعدة Open Assistant لتحسين ردوده على محادثات مع مستخدمين آخرين.", - "overview": "بعد الحصول على الردود التالية للمستخدم، قم بترتيبها من أفضل إلى أسوأ، أفضل أولاً وأسوأ آخراً.", - "unchanged_title": "لم يتغير الترتيب", - "unchanged_message": "لم تقم بتغيير ترتيب الردود. هل أنت متأكد من أن تود الاستمرار؟" + "label": "ترتيب ردود المستخدم", + "desc": "قم بمساعدة المساعد مفتوح المصدر لتحسين ردوده على المحادثات مع المستخدمين الآخرين.", + "overview": "قم بترتيب الردود التالية من المستخدمين من الافضل الى الاسوء , الافضل اولا والاسوء اخيرا", + "unchanged_title": "لم يتم تغيير الترتيب", + "unchanged_message": " لم تغير ترتيب التساؤلات.هل أنت متأكدأنك تريد المتابعة" }, "reply_as_assistant": { - "label": "كــــالمدراء", - "desc": "مساعدة Open Assistant لتحسين ردوده على محادثات مع مستخدمين آخرين.", - "overview": "بعد الحصول على المحادثة التالية، توفير رد كافي", + "label": "رد كالمساعد", + "desc": "قم بمساعدة المساعد مفتوح المصدر لتحسين ردوده على المحادثات مع المستخدمين الآخرين.", + "overview": "قم بتقديم رد مناسب للمحادثة التالية", "response_placeholder": "اكتب ردك هنا..." }, "reply_as_user": { "label": "الرد كمستخدم", - "desc": "تحدث مع Open Assistant وساعده في تحسين ردوده عند التفاعل معه.", - "overview": "بناءً على المحادثة التالية، توفر رد مناسب", - "instruction": "أعط رد المستخدم", + "desc": "تحدث مع المساعد مفتوح المصدر وساعده في تحسين ردوده من خلال التفاعل معه.", + "overview": "قم بتقديم رد مناسب للمحادثة التالية", + "instruction": "اكتب رد المستخدم", "response_placeholder": "اكتب ردك هنا..." } } diff --git a/website/public/locales/ar/tos.json b/website/public/locales/ar/tos.json index 12039a8a..f991b19f 100644 --- a/website/public/locales/ar/tos.json +++ b/website/public/locales/ar/tos.json @@ -1,6 +1,6 @@ { - "title": "شروط الخدمة ل Open Assistant (المساعد المفتوح)", - "content": "للاستمرار في استخدام Open Assistant (المساعد المفتوح)، يجب عليك قبول شروط الخدمة الخاصة بنا أولاً.", + "title": "شروط الخدمة ل Open Assistant (المساعد مفتوح المصدر)", + "content": "للاستمرار في استخدام Open Assistant (المساعد مفتوح المصدر)، يجب عليك قبول شروط الخدمة الخاصة بنا أولاً.", "accept": "قبول", "decline": "رفض" } diff --git a/website/public/locales/ca/common.json b/website/public/locales/ca/common.json index b08d65b2..475f0e5f 100644 --- a/website/public/locales/ca/common.json +++ b/website/public/locales/ca/common.json @@ -1,27 +1,36 @@ { - "about": "Sobre", + "about": "Quant a", "account_settings": "Compte", "admin_dashboard": "Panell d'administració", - "connect": "Connectar", - "conversational": "AI conversacional per a tothom.", + "connect": "Connecta", + "conversational": "IA conversacional per a tothom.", "copied": "Copiat", "dark_mode": "Mode fosc", + "dashboard_home": "Panell principal", "dashboard": "Panell principal", - "delete": "Esborrar", + "delete": "Suprimeix", "discord": "Discord", "docs": "Documentació", "github": "GitHub", + "leaderboard": "Classificacions", "legal": "Legal", "light_mode": "Mode clar", - "loading": "Carregant...", + "loading": "S'està carregant...", + "messages_dashboard": "Panell de missatges", + "messages": "Missatges", "more_information": "Més informació", "no": "No", - "privacy_policy": "Política de privacitat", + "privacy_policy": "Política de Privadesa", "report_a_bug": "Informar d'un error", - "sign_in": "Iniciar sessió", - "sign_out": "Tancar sessió", + "sign_in": "Inicia la sessió", + "sign_out": "Tanca la sessió", "success": "Èxit", - "terms_of_service": "Termes de servei", + "status_dashboard": "Panell d'estat", + "status": "Estat", + "terms_of_service": "Condicions del servei", "title": "Open Assistant", + "user_leaderboard": "Classificació d'usuaris", + "users_dashboard": "Panell d'usuaris", + "users": "Usuaris", "yes": "Sí" } diff --git a/website/public/locales/ca/dashboard.json b/website/public/locales/ca/dashboard.json index a67d0390..1e83745f 100644 --- a/website/public/locales/ca/dashboard.json +++ b/website/public/locales/ca/dashboard.json @@ -1,8 +1,8 @@ { - "create": "Crear", + "create": "Crea", "dashboard": "Panell principal", - "evaluate": "Avaluar", - "go": "Anar", + "evaluate": "Avalua", + "go": "Vés", "grab_a_task": "Pren una tasca!", - "label": "Etiquetar" + "label": "Etiqueta" } diff --git a/website/public/locales/ca/index.json b/website/public/locales/ca/index.json index deea65d7..c85d485d 100644 --- a/website/public/locales/ca/index.json +++ b/website/public/locales/ca/index.json @@ -1,15 +1,15 @@ { "blurb": "Creiem que podem crear una revolució.", "blurb1": "De la mateixa manera que Stable Diffusion va ajudar el món a crear art i imatges de noves maneres, volem millorar el món proporcionant una IA conversacional sorprenent", - "description": "IA conversacional per a tothom. Un projecte de codi obert per crear un GPT LLM preparat per xatejar administrat per LAION i col·laboradors de tot el món.", + "description": "IA conversacional per a tothom. Un projecte de codi obert per a crear un GPT LLM preparat per a xatejar, administrat per LAION i col·laboradors de tot el món.", "faq_items": { "q0": "Com està avançat el projecte?", - "a0": "Estem en les primeres etapes de desenvolupament, treballant a partir de la investigació establerta per aplicar RLHF (aprenentatge per reforç amb realimentació humana) a models de llenguatge de grans dimensions.", + "a0": "Estem en les primeres etapes de desenvolupament, treballant a partir de la investigació establerta per a aplicar RLHF (aprenentatge per reforç amb realimentació humana) a models de llenguatge de grans dimensions.", "q1": "Qui hi ha al darrere d'Open Assistant?", - "a1": "Open Assistant és un projecte organitzat per LAION i persones de tot el planeta interessades a apropar aquesta tecnologia a tothom." + "a1": "Open Assistant és un projecte organitzat per LAION i per persones de tot el planeta interessades a apropar aquesta tecnologia a tothom." }, "faq_title": "Preguntes freqüents", "join_us_description": "Tots els projectes de codi obert comencen amb persones com tu. El codi obert és la creença que si col·laborem plegats, podem regalar el nostre coneixement i tecnologia al món en benefici de la humanitat. T'hi apuntes? Troba'ns aquí:", "join_us_title": "Uneix-te a nosaltres", - "subtitle": "AI conversacional per a tothom." + "subtitle": "IA conversacional per a tothom." } diff --git a/website/public/locales/ca/side_menu.json b/website/public/locales/ca/side_menu.json deleted file mode 100644 index 84e079d7..00000000 --- a/website/public/locales/ca/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Panell principal", - "dashboard_home": "Panell principal", - "leaderboard": "Classificacions", - "messages": "Missatges", - "messages_dashboard": "Taulell de missatges", - "status": "Estat", - "status_dashboard": "Taulell d'estat", - "user_leaderboard": "Classificació d'usuaris", - "users": "Usuaris", - "users_dashboard": "Taulell d'usuaris" -} diff --git a/website/public/locales/da/common.json b/website/public/locales/da/common.json new file mode 100644 index 00000000..441e545c --- /dev/null +++ b/website/public/locales/da/common.json @@ -0,0 +1,38 @@ +{ + "about": "Om", + "account_settings": "Konto", + "admin_dashboard": "Administrator Dashboard", + "connect": "Forbind", + "conversational": "Samtalende AI for alle.", + "copied": "Kopieret", + "dark_mode": "Dark Mode", + "dashboard_home": "Dashboard hjem", + "dashboard": "Dashboard", + "delete": "Slet", + "discord": "Discord", + "docs": "Docs", + "github": "GitHub", + "leaderboard": "Leaderboard", + "legal": "Legal", + "light_mode": "Light Mode", + "loading": "Indlæser...", + "messages_dashboard": "Besked dashboard", + "messages": "Beskeder", + "more_information": "Mere information", + "no": "Nej", + "parameters": "Parametre", + "privacy_policy": "Privatlivspolitik", + "report_a_bug": "Andmeld en fejl", + "sign_in": "Logind", + "sign_out": "Logud", + "status": "Status", + "status_dashboard": "Status dashboard", + "success": "Success", + "terms_of_service": "Terms of Service", + "title": "Open Assistant", + "trollboard": "Trollboard", + "user_leaderboard": "Bruger leaderboard", + "users_dashboard": "Brugere dashboard", + "users": "Brugere", + "yes": "Ja" +} diff --git a/website/public/locales/da/dashboard.json b/website/public/locales/da/dashboard.json new file mode 100644 index 00000000..79c790ef --- /dev/null +++ b/website/public/locales/da/dashboard.json @@ -0,0 +1,8 @@ +{ + "grab_a_task": "Tag en opgave!", + "create": "Lav", + "evaluate": "Evaluer", + "label": "Label", + "dashboard": "Dashboard", + "go": "Start" +} diff --git a/website/public/locales/da/index.json b/website/public/locales/da/index.json new file mode 100644 index 00000000..6d1f181a --- /dev/null +++ b/website/public/locales/da/index.json @@ -0,0 +1,23 @@ +{ + "blurb": "Vi tror på at vi kan skabe en revolution.", + "blurb1": "På samme måde som Stable Diffusion hjalp verden med at skabe kunst og billeder på nye måder, vi ønsker at forbedre verden ved at stille en fantastik samtalende AI til rådighed.", + "description": "Samtalende AI for enhver. Et open source project der vil skabe en GPT LMM af LAION og folk fra verden med mulighed for at chatte", + "faq_items": { + "q0": "Hvor langt er dette project nu?", + "a0": "Vi er i de tidlige stadier af udvikling, vi arbejder med at skabe forskning omkring hvordan man anvender RLHF på store sprogmodeller (LLM).", + "q1": "Hvem står bag Open Assistant?", + "a1": "Open Assistant er et projekt af LAION med individuelle folk fra hele verden interesseret i at bringe denne teknologi til enhver.", + "q2": "Hvilken licens bruger Open Assistant", + "a2": "Både kildetekst og modellen er licenseret under Apache 2.0 licensen.", + "q3": "Vil datasættet bruge til at træne AI'en også blive stillet til rådighed?", + "a3": "Ja, under CC BY 4.0.", + "q4": "Will Open Assistant være gratis?", + "a4": "Ja, Open Assistant bliver gratis, både at bruge og ændre.", + "q5": "Hvilken hardware bliver det nødvendigt at have for at køre modellen?", + "a5": "Der vil være modeller som kan gøres på consumer-hardware." + }, + "faq_title": "Frequently Asked Questions / Ofte stillede spørgsmål", + "join_us_description": "Alle open source projekter begynder med folk som dig. Open source er troen på at vi kan samarbejde om at donere viden og teknologi til verden til fordel for hele menneskeheden. Vil du være med? Find os her:", + "join_us_title": "Join us", + "subtitle": "Samtalende AI for enhver." +} diff --git a/website/public/locales/da/labelling.json b/website/public/locales/da/labelling.json new file mode 100644 index 00000000..2d2bff0e --- /dev/null +++ b/website/public/locales/da/labelling.json @@ -0,0 +1,24 @@ +{ + "fails_task.question": "Er det et dårligt svar som et svar på anmodningen?", + "hate_speech": "Hate Speech", + "hate_speech.explanation": "Indholdet er overgreb, truer eller udtrykker fordomme mod et beskyttet karakteristika. Formodemme henviser til forudindtagede holdninger som ikke har hold i virkeligheden. Beskyttede karaktere inkludere køn, etnisitet, religion og seksuel orientering og ligende karakteristika.", + "label_highlighted_flag_instruction": "Vælg de der gælder for den fremhævede besked:", + "label_highlighted_likert_instruction": "Graduer den fremhævede besked:", + "label_highlighted_yes_no_instruction": "Svar på følgende spørgsmål om den fremhævede besked:", + "label_message_flag_instruction": "Vælg de der gælder for beskeden:", + "label_message_likert_instruction": "Graduer beskeden:", + "label_message_yes_no_instruction": "Svar på følgende spørgsmål om beskeden:", + "lang_mismatch": "Ikke {{language}}", + "lang_mismatch.explanation": "Ikke skrevet på {{language}}.", + "moral_judgement": "Bedømmer moral", + "moral_judgement.explanation": "Udtrykker en moralsk bedømmelse.", + "not_appropriate": "Upassende", + "not_appropriate.explanation": "Upassende for en assistent.", + "pii": "Indeholder PII", + "pii.explanation": "Inderholder personlige henførbare information (Personally Identifying Information). Det kunne f.eks. være kontaktoplysninger, kørekort, CPR-nummer, bankoplysninger etc.", + "political_content": "Politisk", + "political_content.explanation": "Udtrykker politisk holdning.", + "sexual_content": "Seksuelt indhold", + "sexual_content.explanation": "Indeholder sexuelt indhold.", + "spam.question": "Er denne besked spam?" +} diff --git a/website/public/locales/da/leaderboard.json b/website/public/locales/da/leaderboard.json new file mode 100644 index 00000000..294a4adc --- /dev/null +++ b/website/public/locales/da/leaderboard.json @@ -0,0 +1,33 @@ +{ + "accepted": "↪ Accepteret", + "accepted_prompts": "Accepteret prompter", + "daily": "Daglig", + "day": "Dag", + "good_rankings": "Rangliste", + "label": "Labels", + "labels_full": "Labels (fulde)", + "labels_simple": "Labels (simple)", + "last_updated_at": "Sidst opdateret: {{val, datetime}}", + "leaderboard": "Leaderboard", + "month": "Måned", + "monthly": "Månedlig", + "next": "Næste", + "overall": "Overordnet", + "previous": "Forrige", + "prompt": "Prompter", + "rank": "Placering", + "rankings": "Placeringer", + "replies_assistant": "Svar som Assistant", + "replies_prompter": "Svar som Prompter", + "reply": "Svar", + "reply_ranked_1": "Svar på bedste placering", + "score": "Score", + "top_5_contributors_today": "Top 5 bidragsydere i dag", + "total": "Total", + "user": "Bruger", + "view_all": "Se alt", + "week": "Uge", + "weekly": "Ugentlig", + "your_account": "Din konto", + "your_stats": "Din statistik" +} diff --git a/website/public/locales/da/message.json b/website/public/locales/da/message.json new file mode 100644 index 00000000..864a5306 --- /dev/null +++ b/website/public/locales/da/message.json @@ -0,0 +1,21 @@ +{ + "copy_message_id": "Kopier besked ID", + "copy_message_link": "Kopier besked link", + "label_action": "Label", + "label_title": "Label", + "message_deleted": "Besked slettet", + "message": "Besked", + "open_new_tab_action": "Åben i en ny tab", + "parent": "Forælder", + "reactions": "Reaktion", + "recent_messages": "Nylige beskeder", + "report_action": "Anmeld", + "report_placeholder": "Hvorfor skal denne besked vurderes?", + "report_title": "Anmeld", + "send_report": "Indsend", + "stop_tree": "Stop tree", + "submit_labels": "Indsend", + "tree_stopped": "Tree stopped {{id}}", + "view_user": "Se bruger", + "your_recent_messages": "Dine nye beskeder" +} diff --git a/website/public/locales/da/tasks.json b/website/public/locales/da/tasks.json new file mode 100644 index 00000000..56a5d2f7 --- /dev/null +++ b/website/public/locales/da/tasks.json @@ -0,0 +1,84 @@ +{ + "default": { + "unchanged_title": "Ingen ændringer", + "unchanged_message": "Er du sikker på at du vil fortsætte?" + }, + "random": { + "label": "Jeg føler mig heldig", + "desc": "Hjælp os med at forbedre Open Assistant ved at starte en tilfældig opgave." + }, + "create_initial_prompt": { + "label": "Lav første prompter", + "desc": "Skriv første prompter for at hjælpe Open Assistant med at forsøge at svare på diverse beskeder (Læg i lotteriet)", + "overview": "Skab en første besked til Open Assistant", + "instruction": "Giv den første prompt", + "response_placeholder": "Skriv din prompt here..." + }, + "reply_as_user": { + "label": "Svar som bruger", + "desc": "Chat med Open Assistant og hjælp med at forbedre dens respons når du interagere med den.", + "overview": "Givet følgende samtale, angiv et passende svar", + "instruction": "Angiv brugerens svar", + "response_placeholder": "Skriv dit svar her..." + }, + "reply_as_assistant": { + "label": "Svar som assistent", + "desc": "Hjælp Open assistent forbedre dets svar til samtaler med andre.", + "overview": "Givet følgende samtale, angiv et passende svar", + "response_placeholder": "Skriv dit svar her..." + }, + "rank_user_replies": { + "label": "Ranger bruger svar", + "desc": "Hjælp Open assistent forbedre dets svar til samtaler med andre.", + "overview": "Givet følgende samtale, sorter fra bedst til ringest, med bedst i toppen.", + "unchanged_title": "Rækkefølge uændret", + "unchanged_message": "Du har ikke ændret på prompternes rækkefølge. Er du sikker på at du vil fortsætte?" + }, + "rank_assistant_replies": { + "label": "Ranger assistent svar", + "desc": "Giv point til svar givet af Open Assistant baseret på deres præcision og læsbarhed.", + "overview": "Givet følgende svar, sorter fra bedst til ringest, med bedst i toppen.", + "unchanged_title": "Rækkefølge uændret", + "unchanged_message": "Du har ikke ændret på svarenes rækkefølge. Er du sikker på at du vil fortsætte?" + }, + "rank_initial_prompts": { + "label": "Ranger første prompter", + "desc": "Giv point til prompter givet af Open Assistant baseret på deres præcision og læsbarhed.", + "overview": "Givet følgende første prompter, sorter fra bedst til ringest, med bedst i toppen.", + "unchanged_title": "Rækkefølge uændret", + "unchanged_message": "Du har ikke ændret på prompternes rækkefølge. Er du sikker på at du vil fortsætte?" + }, + "label_initial_prompt": { + "label": "Label første prompt", + "desc": "Angiv labels for en prompt.", + "overview": "Angiv labels for den følgende prompt" + }, + "label_prompter_reply": { + "label": "Label prompter svar", + "desc": "Angiv labels for en prompt.", + "overview": "Givet følgende diskussion, angiv labels for den endelige prompt." + }, + "label_assistant_reply": { + "label": "Label assistent svar", + "desc": "Angiv labels for et svar.", + "overview": "Givet følgende diskussion, angiv labels for den endelige prompt." + }, + "classify_initial_prompt": { + "label": "Klasificer første prompt", + "desc": "Provide labels for en prompt.", + "overview": "Givet følgende prompt svar da på spørgsmål omkring denne." + }, + "classify_prompter_reply": { + "label": "Klassificer promptsvar", + "desc": "Provide labels for a prompt.", + "overview": "Læs den følgende samtale og svar på spørgsmålet omrking det sidste svar i diskussionen." + }, + "classify_assistant_reply": { + "label": "Klassificer assistentens svar", + "desc": "Angiv labels for en prompt.", + "overview": "Læs følgende samtale og svar så på spørgsmål omkring det sidste svar i diskussionen." + }, + "available_task_count": "{{count}} tilgængelige opgaver", + "writing_wrong_langauge_a_b": "Det ser ud til at du skriver på {{detected_lang}} men det bliver lagt ind i {{submit_lang}}.", + "submitted_as": "Dette vil blive lagt ind i {{submit_lang}}" +} diff --git a/website/public/locales/da/tos.json b/website/public/locales/da/tos.json new file mode 100644 index 00000000..718233b1 --- /dev/null +++ b/website/public/locales/da/tos.json @@ -0,0 +1,6 @@ +{ + "title": "Servicebetingelser for Open Assistant", + "content": "For fortsat at bruge Open Assistant, skal du acceptere servicebetingelserne.", + "accept": "Accepter", + "decline": "Afslå" +} diff --git a/website/public/locales/de/common.json b/website/public/locales/de/common.json index d401f4cb..655745de 100644 --- a/website/public/locales/de/common.json +++ b/website/public/locales/de/common.json @@ -6,22 +6,31 @@ "conversational": "Konversations-KI für alle.", "copied": "Kopiert", "dark_mode": "Dunkler Modus", + "dashboard_home": "Dashboard Home", "dashboard": "Dashboard", "delete": "Löschen", "discord": "Discord", "docs": "Doku", "github": "GitHub", + "leaderboard": "Leaderboard", "legal": "Rechtliches", "light_mode": "Heller Modus", "loading": "Wird geladen...", + "messages_dashboard": "Messages Dashboard", + "messages": "Nachrichten", "more_information": "Weitere Informationen", "no": "Nein", "privacy_policy": "Datenschutz-Bestimmungen", "report_a_bug": "Einen Fehler melden", "sign_in": "Anmelden", "sign_out": "Abmelden", + "status_dashboard": "Status Dashboard", + "status": "Status", "success": "Erfolg", "terms_of_service": "Nutzungsbedingungen", "title": "Open Assistant", + "user_leaderboard": "User Leaderboard", + "users_dashboard": "Users Dashboard", + "users": "Users", "yes": "Ja" } diff --git a/website/public/locales/de/dashboard.json b/website/public/locales/de/dashboard.json index b418eaaa..66e86532 100644 --- a/website/public/locales/de/dashboard.json +++ b/website/public/locales/de/dashboard.json @@ -3,6 +3,6 @@ "dashboard": "Dashboard", "evaluate": "Auswerten", "go": "Los", - "grab_a_task": "Schnapp dir eine Aufgabe!", + "grab_a_task": "Schnappen Sie sich eine Aufgabe!", "label": "Label" } diff --git a/website/public/locales/de/labelling.json b/website/public/locales/de/labelling.json index 6569976c..19854d06 100644 --- a/website/public/locales/de/labelling.json +++ b/website/public/locales/de/labelling.json @@ -19,6 +19,6 @@ "political_content": "Politisch", "political_content.explanation": "Enthält politische Meinungen.", "sexual_content": "Sexueller Inhalt", - "sexual_content.explanation": "Contains sexual content.", + "sexual_content.explanation": "Enthält sexuelle Inhalte.", "spam.question": "Ist die Nachricht Spam?" } diff --git a/website/public/locales/de/message.json b/website/public/locales/de/message.json index 6a74a1c3..8e7aaafe 100644 --- a/website/public/locales/de/message.json +++ b/website/public/locales/de/message.json @@ -1,13 +1,13 @@ { - "copy_message_id": "Copy message ID", + "copy_message_id": "Message ID kopieren", "label_action": "Label", "label_title": "Label", "message": "Nachricht", - "message_deleted": "Message deleted", + "message_deleted": "Nachricht gelöscht", "open_new_tab_action": "In neuem Tab öffnen", "parent": "Vorgänger", "reactions": "Reaktionen", - "recent_messages": "Recent Messages", + "recent_messages": "Kürzliche Nachrichten", "report_action": "Melden", "report_placeholder": "Warum sollte diese Nachricht überprüft werden?", "report_title": "Meldung", @@ -15,6 +15,6 @@ "stop_tree": "Stop tree", "submit_labels": "Absenden", "tree_stopped": "Tree stopped {{id}}", - "view_user": "View user", - "your_recent_messages": "Your Recent Messages" + "view_user": "Benutzer anzeigen", + "your_recent_messages": "Ihre kürzliche Nachrichten" } diff --git a/website/public/locales/de/side_menu.json b/website/public/locales/de/side_menu.json deleted file mode 100644 index c17b475a..00000000 --- a/website/public/locales/de/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Dashboard", - "dashboard_home": "Dashboard Home", - "leaderboard": "Leaderboard", - "messages": "Nachrichten", - "messages_dashboard": "Messages Dashboard", - "status": "Status", - "status_dashboard": "Status Dashboard", - "user_leaderboard": "User Leaderboard", - "users": "Users", - "users_dashboard": "Users Dashboard" -} diff --git a/website/public/locales/de/tasks.json b/website/public/locales/de/tasks.json index a3e25217..c5646666 100644 --- a/website/public/locales/de/tasks.json +++ b/website/public/locales/de/tasks.json @@ -1,5 +1,5 @@ { - "available_task_count": "{{count}} tasks available", + "available_task_count": "{{count}} Aufgaben verfügbar", "classify_assistant_reply": { "label": "Antwort des Assistenten klassifizieren", "desc": "Labeln Sie die Antwort.", diff --git a/website/public/locales/de/tos.json b/website/public/locales/de/tos.json index 4d3d62b4..d424f5b5 100644 --- a/website/public/locales/de/tos.json +++ b/website/public/locales/de/tos.json @@ -1,6 +1,6 @@ { - "accept": "Accept", - "content": "To continue using Open Assistant, you have to accept our Terms of Service first.", - "decline": "Decline", - "title": "Terms of Service for Open Assistant" + "accept": "Akzeptieren", + "content": "Um Open Assistant weiterhin nutzen zu können, müssen Sie zunächst unsere Nutzungsbedingungen akzeptieren.", + "decline": "Ablehnen", + "title": "Nutzungsbedingungen für Open Assistant" } diff --git a/website/public/locales/en/common.json b/website/public/locales/en/common.json index b964b26a..08ae1ee4 100644 --- a/website/public/locales/en/common.json +++ b/website/public/locales/en/common.json @@ -6,22 +6,33 @@ "conversational": "Conversational AI for everyone.", "copied": "Copied", "dark_mode": "Dark Mode", + "dashboard_home": "Dashboard Home", "dashboard": "Dashboard", "delete": "Delete", "discord": "Discord", "docs": "Docs", "github": "GitHub", + "leaderboard": "Leaderboard", "legal": "Legal", "light_mode": "Light Mode", "loading": "Loading...", + "messages_dashboard": "Messages Dashboard", + "messages": "Messages", "more_information": "More Information", "no": "No", + "parameters": "Parameters", "privacy_policy": "Privacy Policy", "report_a_bug": "Report a Bug", "sign_in": "Sign In", "sign_out": "Sign Out", + "status": "Status", + "status_dashboard": "Status Dashboard", "success": "Success", "terms_of_service": "Terms of Service", "title": "Open Assistant", + "trollboard": "Trollboard", + "user_leaderboard": "User Leaderboard", + "users_dashboard": "Users Dashboard", + "users": "Users", "yes": "Yes" } diff --git a/website/public/locales/en/message.json b/website/public/locales/en/message.json index a531f0a4..75565656 100644 --- a/website/public/locales/en/message.json +++ b/website/public/locales/en/message.json @@ -8,7 +8,7 @@ "open_new_tab_action": "Open in new tab", "parent": "Parent", "reactions": "Reactions", - "recent_messages": "Recent Messages", + "recent_messages": "Recent Messages in {{language}}", "report_action": "Report", "report_placeholder": "Why should this message be reviewed?", "report_title": "Report", diff --git a/website/public/locales/en/side_menu.json b/website/public/locales/en/side_menu.json deleted file mode 100644 index cbbb3fad..00000000 --- a/website/public/locales/en/side_menu.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "dashboard": "Dashboard", - "dashboard_home": "Dashboard Home", - "messages": "Messages", - "messages_dashboard": "Messages Dashboard", - "leaderboard": "Leaderboard", - "user_leaderboard": "User Leaderboard", - "users": "Users", - "users_dashboard": "Users Dashboard", - "status": "Status", - "status_dashboard": "Status Dashboard", - "trollboard": "Trollboard" -} diff --git a/website/public/locales/es/common.json b/website/public/locales/es/common.json index eac24039..4a663534 100644 --- a/website/public/locales/es/common.json +++ b/website/public/locales/es/common.json @@ -6,22 +6,33 @@ "conversational": "IA conversacional para todos.", "copied": "Copiado", "dark_mode": "Modo oscuro", + "dashboard_home": "Panel principal", "dashboard": "Panel principal", "delete": "Borrar", "discord": "Discord", "docs": "Documentación", "github": "GitHub", + "leaderboard": "Clasificaciones", "legal": "Legal", "light_mode": "Modo claro", "loading": "Cargando...", + "messages_dashboard": "Tablón de mensajes", + "messages": "Mensajes", "more_information": "Más información", "no": "No", + "parameters": "Parámetros", "privacy_policy": "Política de privacidad", "report_a_bug": "Informar de un error", "sign_in": "Iniciar sesión", "sign_out": "Cerrar sesión", + "status": "Estado", + "status_dashboard": "Tablón de estado", "success": "Éxito", "terms_of_service": "Términos de servicio", "title": "Open Assistant", + "trollboard": "Tablón de trolls", + "user_leaderboard": "Clasificación de usuarios", + "users_dashboard": "Tablón de usuarios", + "users": "Usuarios", "yes": "Sí" } diff --git a/website/public/locales/es/leaderboard.json b/website/public/locales/es/leaderboard.json index 1972e900..bb76060f 100644 --- a/website/public/locales/es/leaderboard.json +++ b/website/public/locales/es/leaderboard.json @@ -1,18 +1,33 @@ { + "accepted": "↪ Aceptadas", + "accepted_prompts": "Indicaciones aceptadas", "daily": "Diario", + "day": "Día", + "good_rankings": "Buenas clasificaciones", "label": "Etiquetas", + "labels_full": "Etiquetas (completas)", + "labels_simple": "Etiquetas (sencillas)", "last_updated_at": "Última actualización: {{val, datetime}}", "leaderboard": "Tabla de clasificación", + "month": "Mes", "monthly": "Mensual", "next": "Siguiente", "overall": "Global", "previous": "Anterior", "prompt": "Indicaciones", "rank": "Posición", + "rankings": "Ordenaciones", + "replies_assistant": "Respuestas como asistente", + "replies_prompter": "Respuestas como apuntador", "reply": "Respuestas", + "reply_ranked_1": "Respuestas clasificadas primeras", "score": "Puntuación", "top_5_contributors_today": "5 mayores contribuidores hoy", + "total": "Total", "user": "Usuario", "view_all": "Ver todos", - "weekly": "Semanal" + "week": "Semana", + "weekly": "Semanal", + "your_account": "Tu cuenta", + "your_stats": "Tus estadísticas" } diff --git a/website/public/locales/es/message.json b/website/public/locales/es/message.json index 081b81f4..b2b3f2f3 100644 --- a/website/public/locales/es/message.json +++ b/website/public/locales/es/message.json @@ -1,5 +1,6 @@ { "copy_message_id": "Copiar ID del mensaje", + "copy_message_link": "Copiar enlace al mensaje", "label_action": "Etiquetar", "label_title": "Etiqueta", "message": "Mensaje", diff --git a/website/public/locales/es/side_menu.json b/website/public/locales/es/side_menu.json deleted file mode 100644 index 080fb902..00000000 --- a/website/public/locales/es/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Panel principal", - "dashboard_home": "Panel principal", - "leaderboard": "Clasificaciones", - "messages": "Mensajes", - "messages_dashboard": "Tablón de mensajes", - "status": "Estado", - "status_dashboard": "Tablón de estado", - "user_leaderboard": "Clasificación de usuarios", - "users": "Usuarios", - "users_dashboard": "Tablón de usuarios" -} diff --git a/website/public/locales/es/tasks.json b/website/public/locales/es/tasks.json index 1c6a4cc5..da761255 100644 --- a/website/public/locales/es/tasks.json +++ b/website/public/locales/es/tasks.json @@ -78,5 +78,7 @@ "overview": "Dada la conversación siguiente, proporciona una respuesta adecuada", "instruction": "Proporciona la respuesta del usuario", "response_placeholder": "Escribe tu respuesta aquí..." - } + }, + "writing_wrong_langauge_a_b": "Parece que estás escribiendo en {{detected_lang}} pero esto se enviará como {{submit_lang}}.", + "submitted_as": "Esto será enviado como {{submit_lang}}" } diff --git a/website/public/locales/es/tos.json b/website/public/locales/es/tos.json index 4d3d62b4..e02420a4 100644 --- a/website/public/locales/es/tos.json +++ b/website/public/locales/es/tos.json @@ -1,6 +1,6 @@ { - "accept": "Accept", - "content": "To continue using Open Assistant, you have to accept our Terms of Service first.", - "decline": "Decline", - "title": "Terms of Service for Open Assistant" + "accept": "Aceptar", + "content": "Para continuar usando Open Assistant, tienes que aceptar nuestros Términos de Servicio primero.", + "decline": "Rechazar", + "title": "Términos de Servicio para Open Assistant" } diff --git a/website/public/locales/eu/common.json b/website/public/locales/eu/common.json new file mode 100644 index 00000000..1410ed74 --- /dev/null +++ b/website/public/locales/eu/common.json @@ -0,0 +1,38 @@ +{ + "about": "Honi buruz", + "account_settings": "Kontua", + "admin_dashboard": "Administrazio panela", + "connect": "Konektatu", + "conversational": "Elkarrizketarako AA guztientzat.", + "copied": "Kopiatua", + "dark_mode": "Ilun modua", + "dashboard": "Aginte-panela", + "dashboard_home": "Aginte-panelaren hasiera", + "delete": "Ezabatu", + "discord": "Discord", + "docs": "Dokumentazioa", + "github": "GitHub", + "leaderboard": "Sailkapena", + "legal": "Legea", + "light_mode": "Argi modua", + "loading": "Kargatzen...", + "messages": "Mezuak", + "messages_dashboard": "Mezuen panela", + "more_information": "Informazio gehiago", + "no": "Ez", + "parameters": "Parametroak", + "privacy_policy": "Pribatutasun politika", + "report_a_bug": "Errorea jakinarazi", + "sign_in": "Hasi saioa", + "sign_out": "Amaitu saioa", + "status": "Egoera", + "status_dashboard": "Egoera panela", + "success": "Arrakasta", + "terms_of_service": "Zerbitzuaren baldintzak", + "title": "Open Assistant", + "trollboard": "Troll sailkapena", + "user_leaderboard": "Erabiltzaileen sailkapena", + "users": "Erabiltzaileak", + "users_dashboard": "Erabiltzaileen panela", + "yes": "Bai" +} diff --git a/website/public/locales/eu/dashboard.json b/website/public/locales/eu/dashboard.json new file mode 100644 index 00000000..fd5d377b --- /dev/null +++ b/website/public/locales/eu/dashboard.json @@ -0,0 +1,8 @@ +{ + "create": "Sortu", + "dashboard": "Aginte-panela", + "evaluate": "Ebaluatu", + "go": "Joan", + "grab_a_task": "Hartu zeregin bat!", + "label": "Etiketa" +} diff --git a/website/public/locales/eu/index.json b/website/public/locales/eu/index.json new file mode 100644 index 00000000..4a2ef104 --- /dev/null +++ b/website/public/locales/eu/index.json @@ -0,0 +1,23 @@ +{ + "blurb": "Uste dugu iraultza bat sortu dezakegula.", + "blurb1": "Stable Diffusion-ek munduari artea eta irudiak modu berrietan egiten lagundu zion modu berean, mundua hobetu nahi dugu elkarrizketarako AA harrigarria eskainiz.", + "description": "Elkarrizketarako AA guztientzat. Iturburu irekiko proiektua GPT LLM txat bat sortzeko, LAIONek eta mundu osoko laguntzaileek zuzendua", + "faq_items": { + "q0": "Noraino doa proiektu hau?", + "a0": "Garapenaren hasierako faseetan gaude, RLHF hizkuntza-eredu handiei aplikatzeko ezarritako ikerketatik lanean.", + "q1": "Nor dago Open Assistant-en atzean?", + "a1": "Open Assistant LAIONek eta mundu osoko teknologia hori guztiongana heltzeko interesa duten pertsonek antolatutako proiektua da.", + "q2": "Zer lizentzia erabiltzen du Open Assistant-ek?", + "a2": "Kodea eta ereduak Apache 2.0 lizentziapean daude lizentziatuta.", + "q3": "Entrenamendurako datuak ere kaleratuko al dira?", + "a3": "Bai, CC BY 4.0 lizentziarekin.", + "q4": "Open Assistant doakoa izango al da?", + "a4": "Bai, Open Assistant librea izango da erabiltzeko eta aldatzeko.", + "q5": "Zer hardware beharko da ereduak exekutatzeko?", + "a5": "Kontsumitzaileen hardwarean exekutatu ahal izango diren bertsioak egongo dira." + }, + "faq_title": "Ohiko galderak", + "join_us_description": "Kode irekiko proiektu guztiak zu bezalako jendearekin hasten dira. Kode irekia elkarlanean aritzen bagara elkarrekin gure ezagutzak eta teknologiak gizateriaren mesederako munduari opari ditzakegula ustea da. Barruan al zaude? Aurki gaitzazu hemen:", + "join_us_title": "Bat egin gurekin", + "subtitle": "Elkarrizketarako AA guztientzat." +} diff --git a/website/public/locales/eu/labelling.json b/website/public/locales/eu/labelling.json new file mode 100644 index 00000000..05b691d2 --- /dev/null +++ b/website/public/locales/eu/labelling.json @@ -0,0 +1,24 @@ +{ + "fails_task.question": "Erantzun txarra al da, sarrerako zereginaren erantzun gisa?", + "hate_speech": "Gorroto hizkera", + "hate_speech.explanation": "Edukia abusua edo mehatxagarria da eta babestutako ezaugarri baten aurkako aurreiritziak adierazten ditu. Aurreiritziak arrazonaketan oinarritzen ez diren aurreko ikuspegiak dira. Babestutako ezaugarrien artean generoa, etnia, erlijioa, sexu-orientazioa eta antzeko ezaugarriak daude.", + "label_highlighted_flag_instruction": "Hautatu nabarmendutako mezuari dagokion edozein:", + "label_highlighted_likert_instruction": "Baloratu nabarmendutako mezua:", + "label_highlighted_yes_no_instruction": "Erantzun nabarmendutako mezuari buruzko galdera hau(ek):", + "label_message_flag_instruction": "Hautatu mezuari dagokion edozein:", + "label_message_likert_instruction": "Baloratu mezua:", + "label_message_yes_no_instruction": "Erantzun mezuari buruzko galdera hau(ek):", + "lang_mismatch": "Hizkuntza okerra", + "lang_mismatch.explanation": "Ez dago une honetan hautatutako hizkuntzan idatzita.", + "moral_judgement": "Moraltasuna epaitzen du", + "moral_judgement.explanation": "Epaiketa morala adierazten du.", + "not_appropriate": "Desegokia", + "not_appropriate.explanation": "Desegokia bezero laguntzaile batentzat.", + "pii": "PII dauka", + "pii.explanation": "Pertsonalki identifikatzeko informazioa dauka. Adibidez, harremanetarako datu pertsonalak, lizentzia eta beste identitate-zenbaki batzuk eta banku-datuak daude.", + "political_content": "Politikoa", + "political_content.explanation": "Iritzi politikoak adierazten ditu.", + "sexual_content": "Eduki sexuala", + "sexual_content.explanation": "Eduki sexuala dauka.", + "spam.question": "Mezua spama al da?" +} diff --git a/website/public/locales/eu/leaderboard.json b/website/public/locales/eu/leaderboard.json new file mode 100644 index 00000000..d9a07adb --- /dev/null +++ b/website/public/locales/eu/leaderboard.json @@ -0,0 +1,33 @@ +{ + "accepted": "↪ Onartua", + "accepted_prompts": "Onartutako instrukzioak", + "daily": "Eguekoa", + "day": "Eguna", + "good_rankings": "Sailkapen onak", + "label": "Etiketak", + "labels_full": "Etiketak (osoa)", + "labels_simple": "Etiketak (sinplea)", + "last_updated_at": "Azken eguneratzea: {{val, datetime}}", + "leaderboard": "Sailkapena", + "month": "Hilabetea", + "monthly": "Hilekoa", + "next": "Hurrengoa", + "overall": "Orokorra", + "previous": "Aurrekoa", + "prompt": "Eskaerak", + "rank": "Postua", + "rankings": "Sailkapenak", + "replies_assistant": "Erantzunak asistente bezala", + "replies_prompter": "Erantzunak erabiltzaile bezala", + "reply": "Erantzunak", + "reply_ranked_1": "Lehen postuan sailkatutako erantzunak", + "score": "Puntuazioa", + "top_5_contributors_today": "Gaurko 5 laguntzaile nagusiak", + "total": "Guztira", + "user": "Erabiltzailea", + "view_all": "Ikusi guztiak", + "week": "Astea", + "weekly": "Astekoa", + "your_account": "Zure kontua", + "your_stats": "Zure estatistikak" +} diff --git a/website/public/locales/eu/message.json b/website/public/locales/eu/message.json new file mode 100644 index 00000000..aef68076 --- /dev/null +++ b/website/public/locales/eu/message.json @@ -0,0 +1,21 @@ +{ + "copy_message_id": "Kopiatu mezuaren IDa", + "copy_message_link": "Kopiatu mezuaren esteka", + "label_action": "Etiketatu", + "label_title": "Etiketa", + "message": "Mezua", + "message_deleted": "Mezua ezabatu da", + "open_new_tab_action": "Ireki orri berri batean", + "parent": "Guraso", + "reactions": "Erreakzioak", + "recent_messages": "Azken mezuak", + "report_action": "Salatu", + "report_placeholder": "Zergatik berrikusi behar da mezu hau?", + "report_title": "Txostena", + "send_report": "Bidali", + "stop_tree": "Gelditu zuhaitza", + "submit_labels": "Bidali", + "tree_stopped": "Zuhaitza gelditu da {{id}}", + "view_user": "Ikusi erabiltzailea", + "your_recent_messages": "Zure azken mezuak" +} diff --git a/website/public/locales/eu/tasks.json b/website/public/locales/eu/tasks.json new file mode 100644 index 00000000..aa5ea58d --- /dev/null +++ b/website/public/locales/eu/tasks.json @@ -0,0 +1,84 @@ +{ + "available_task_count": "{{count}} zeregin eskuragarri", + "classify_assistant_reply": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Sailifikatu Laguntzailea eginbidearen erantzuna", + "overview": "Irakurri hurrengo elkarrizketa eta erantzun elkarrizketako azken erantzunari buruzko galdera." + }, + "classify_initial_prompt": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Salifikatu hasierako instrukzioa", + "overview": "Irakurri hurrengo instrukzioa eta erantzun horri buruzko galdera." + }, + "classify_prompter_reply": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Sailifikatu galdetzailearen erantzuna", + "overview": "Irakurri hurrengo elkarrizketa eta erantzun elkarrizketako azken erantzunari buruzko galdera." + }, + "create_initial_prompt": { + "desc": "Idatzi hasierako instrukzioak Open Assistant-i era askotako mezuak erantzuten saia dadin. (sartu loterian)", + "instruction": "Eman hasierako instrukzioak", + "label": "Sortu hasierako instrukzioak", + "overview": "Sortu hasierako mezu bat asistenteari bidaltzeko", + "response_placeholder": "Idatzi zure instrukzioak hemen..." + }, + "default": { + "unchanged_message": "Ziur al zaude jarraitu nahi duzula?", + "unchanged_title": "Ez dago aldaketarik" + }, + "label_assistant_reply": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Etiketatu asistentearen erantzuna", + "overview": "Ondoko elkarrizketa ikusita, eman etiketak hurrengo instrukzioari." + }, + "label_initial_prompt": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Etiketatu hasierako instrukzioak", + "overview": "Jarri etiketak hurrengo instrukzioari" + }, + "label_prompter_reply": { + "desc": "Jarri etiketak instrukzio bati.", + "label": "Etiketatu galdetzailearen erantzuna", + "overview": "Ondoko elkarrizketa ikusita, eman etiketak hurrengo instrukzioari." + }, + "random": { + "desc": "Lagun iezaguzu Open Assistant hobetzen ausazko zeregin bat hasiz.", + "label": "Zorionekoa sentitzen naiz" + }, + "rank_assistant_replies": { + "desc": "Open Assistant-ek emandako erantzunak sailkatu zehaztasun eta irakurgarritasunaren arabera.", + "label": "Asistentearen erantzunak sailkatu", + "overview": "Ondoko asistentearen erantzunak ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena.", + "unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?", + "unchanged_title": "Ordena aldatu gabe" + }, + "rank_initial_prompts": { + "desc": "Open Assistant-ek emandako erantzunak sailkatu zehaztasun eta irakurgarritasunaren arabera.", + "label": "Hasierako instrukzioak sailkatu", + "overview": "Hasierako eskaera hauek ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena izanik.", + "unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?", + "unchanged_title": "Ordena aldatu gabe" + }, + "rank_user_replies": { + "desc": "Lagundu Open Assistant-ek beste erabiltzaile batzuekin dituen elkarrizketetako erantzunak hobetzen.", + "label": "Erabiltzaileen erantzunak sailkatu", + "overview": "Ondoko erabiltzaileen erantzunak ikusita, ordenatu itzazu onenetik txarrenera, onena lehena izanik, txarrena azkena izatea.", + "unchanged_message": "Ez duzu galderen ordena aldatu. Ziur al zaude jarraitu nahi duzula?", + "unchanged_title": "Ordena aldatu gabe" + }, + "reply_as_assistant": { + "desc": "Lagundu Open Assistant-ek beste erabiltzaile batzuekin dituen elkarrizketetako erantzunak hobetzen.", + "label": "Erantzun asistente gisa", + "overview": "Ondoko elkarrizketa ikusita, eman erantzun egokia", + "response_placeholder": "Idatzi zure erantzuna hemen..." + }, + "reply_as_user": { + "desc": "Txateatu Open Assistant-ekin eta lagundu bere erantzunak hobetzen harekin elkarreraginean.", + "instruction": "Eman erabiltzailearen erantzuna", + "label": "Erantzun erabiltzaile gisa", + "overview": "Ondoko elkarrizketa ikusita, eman erantzun egokia", + "response_placeholder": "Idatzi zure erantzuna hemen..." + }, + "submitted_as": "Hau {{submit_lang}} hizkuntzan bidaliko da ", + "writing_wrong_langauge_a_b": "Ematen du {{detected_lang}} hizkuntzan baina hau {{submit_lang}} hizkuntzan bidaliko da." +} diff --git a/website/public/locales/eu/tos.json b/website/public/locales/eu/tos.json new file mode 100644 index 00000000..2d6485f5 --- /dev/null +++ b/website/public/locales/eu/tos.json @@ -0,0 +1,6 @@ +{ + "accept": "Onartu", + "content": "Open Assistant erabiltzen jarraitzeko, gure zerbitzu-baldintzak onartu behar dituzu lehenik.", + "decline": "Ukatu", + "title": "Open Assistant-en zerbitzu-baldintzak" +} diff --git a/website/public/locales/fa/common.json b/website/public/locales/fa/common.json new file mode 100644 index 00000000..4e1f74ca --- /dev/null +++ b/website/public/locales/fa/common.json @@ -0,0 +1,38 @@ +{ + "about": "درباره", + "account_settings": "حساب کاربری", + "admin_dashboard": "داشبورد مدیر", + "connect": "اتصال", + "conversational": "هوش مصنوعی مکالمه برای همه.", + "copied": "کپی شد", + "dark_mode": "حالت تاریک", + "dashboard_home": "خانه داشبورد", + "dashboard": "داشبورد", + "delete": "حذف", + "discord": "دیسکورد", + "docs": "مستندات", + "github": "گیت‌هاب", + "leaderboard": "رده بندی", + "legal": "قانونی", + "light_mode": "حالت روشن", + "loading": "در حال بارگذاری...", + "messages_dashboard": "داشبورد پیام‌ها", + "messages": "پیام‌ها", + "more_information": "اطلاعات بیشتر", + "no": "نه", + "parameters": "مولفه ها", + "privacy_policy": "حریم خصوصی", + "report_a_bug": "گزارش اشکال", + "sign_in": "ورود", + "sign_out": "خروج", + "status": "وضعیت", + "status_dashboard": "داشبورد وضعیت", + "success": "موفقیت", + "terms_of_service": "شرایط استفاده", + "title": "دستیار باز", + "trollboard": "Trollboard", + "user_leaderboard": "رده بندی کاربر", + "users_dashboard": "رده بندی کاربر", + "users": "کاربر", + "yes": "بله" +} diff --git a/website/public/locales/fa/dashboard.json b/website/public/locales/fa/dashboard.json new file mode 100644 index 00000000..8a76bc73 --- /dev/null +++ b/website/public/locales/fa/dashboard.json @@ -0,0 +1,8 @@ +{ + "grab_a_task": "انجام یک کار!", + "create": "ساخت", + "evaluate": "ارزیابی", + "label": "برچسب", + "dashboard": "داشبورد", + "go": "برو" +} diff --git a/website/public/locales/fa/index.json b/website/public/locales/fa/index.json new file mode 100644 index 00000000..95ca824d --- /dev/null +++ b/website/public/locales/fa/index.json @@ -0,0 +1,23 @@ +{ + "blurb": "ما معتقدیم که می توانیم انقلابی ایجاد کنیم.", + "blurb1": "همانطور که Stable Diffusion به جهان کمک کرد تا هنر و تصاویر را به روش های جدید بسازد، ما می خواهیم با ارائه هوش مصنوعی محاوره ای شگفت انگیز جهان را بهبود بخشیم..", + "description": "هوش مصنوعی مکالمه ای برای همه یک پروژه منبع باز برای ایجاد یک گپ فعال GPT LLM که توسط LAION و مشارکت کنندگان در سراسر جهان اجرا می شود.", + "faq_items": { + "q0": "این پروژه چقدر طول می کشد?", + "a0": "ما در مراحل اولیه توسعه هستیم و از تحقیقات تثبیت شده در استفاده از RLHF به مدل های زبانی بزرگ کار می کنیم.", + "q1": "چه کسی پشت دستیار باز است?", + "a1": "دستیار باز پروژه ای است که توسط LAION و افرادی در سراسر جهان که علاقه مند به ارائه این فناوری به همه هستند سازماندهی شده است.", + "q2": "دستیار باز از چه مجوزی استفاده می کند?", + "a2": "کد و مدل ها تحت مجوز Apache 2.0 مجوز هستند.", + "q3": "آیا داده های آموزشی نیز منتشر خواهد شد?", + "a3": "بله, تحت CC BY 4.0.", + "q4": "دستیار باز رایگان خواهد بود?", + "a4": "بله، دستیار باز برای استفاده و اصلاح رایگان خواهد بود.", + "q5": "چه سخت افزاری برای اجرای مدل ها مورد نیاز خواهد بود?", + "a5": "نسخه هایی وجود خواهد داشت که روی سخت افزار همه قابل اجرا خواهند بود." + }, + "faq_title": "سوالات متداول", + "join_us_description": "همه پروژه های متن باز با افرادی مانند شما شروع می شوند. منبع باز این باور است که اگر ما همکاری کنیم، می توانیم با هم دانش و فناوری خود را به نفع بشریت به جهان هدیه کنیم. شما داخل هستید؟ ما را اینجا پیدا کنید:", + "join_us_title": "به ما بپیوندید", + "subtitle": "هوش مصنوعی مکالمه برای همه." +} diff --git a/website/public/locales/fa/labelling.json b/website/public/locales/fa/labelling.json new file mode 100644 index 00000000..26f67447 --- /dev/null +++ b/website/public/locales/fa/labelling.json @@ -0,0 +1,24 @@ +{ + "fails_task.question": "آیا این یک پاسخ بد است، به عنوان پاسخ به کار سریع?", + "hate_speech": "سخنان تنفرآمیز", + "hate_speech.explanation": "محتوا توهین آمیز یا تهدیدآمیز است و بیانگر تعصب نسبت به یک ویژگی محافظت شده است. تعصب به دیدگاه های از پیش تعیین شده ای اشاره دارد که مبتنی بر عقل نیست. ویژگی های محافظت شده شامل جنسیت، قومیت، مذهب، گرایش جنسی و ویژگی های مشابه است.", + "label_highlighted_flag_instruction": "هر کدام را که برای پیام برجسته شده اعمال می شود انتخاب کنید:", + "label_highlighted_likert_instruction": "به پیام هایلایت شده امتیاز دهید:", + "label_highlighted_yes_no_instruction": "به سوال(های) زیر در مورد پیام برجسته شده پاسخ دهید:", + "label_message_flag_instruction": "هر کدام را که برای پیام اعمال می شود انتخاب کنید:", + "label_message_likert_instruction": "به پیام امتیاز دهید:", + "label_message_yes_no_instruction": "به سوال(های) زیر در مورد پیام پاسخ دهید:", + "lang_mismatch": "زبان اشتباه", + "lang_mismatch.explanation": "به زبان انتخابی فعلی نوشته نشده است.", + "moral_judgement": "اخلاقی بودن", + "moral_judgement.explanation": "غیر اخلاقی است.", + "not_appropriate": "نامناسب", + "not_appropriate.explanation": "برای کاربر نامناسب است.", + "pii": "حاوی آشش", + "pii.explanation": "حاوی اطلاعات شناسایی شخصی است. به عنوان مثال می توان به اطلاعات تماس شخصی، مجوز و سایر شماره های هویتی و جزئیات بانکی اشاره کرد.", + "political_content": "سیاسی", + "political_content.explanation": "دیدگاه های سیاسی را بیان می کند.", + "sexual_content": "محتوای جنسی", + "sexual_content.explanation": "حاوی محتوای جنسی.", + "spam.question": "آیا پیام هرزنامه است?" +} diff --git a/website/public/locales/fa/leaderboard.json b/website/public/locales/fa/leaderboard.json new file mode 100644 index 00000000..f4c44ba5 --- /dev/null +++ b/website/public/locales/fa/leaderboard.json @@ -0,0 +1,33 @@ +{ + "daily": "روزانه", + "label": "برچسب ها", + "last_updated_at": "آخرین به روز رسانی در: {{val, datetime}}", + "leaderboard": "رده بندی", + "monthly": "ماهانه", + "next": "بعدی", + "overall": "کلی", + "previous": "قبلی", + "prompt": "درخواست", + "rank": "رتبه", + "reply": "پاسخ", + "score": "امتیاز", + "top_5_contributors_today": "5 مشارکت کننده برتر امروز", + "user": "کاربر", + "view_all": "نمایش همه", + "weekly": "هفتگی", + "accepted": "↪ پذیرفته شده", + "accepted_prompts": "درخواست های پذیرفته شده", + "day": "روز", + "good_rankings": "رتبه های خوب", + "labels_full": "برچسب ها (کامل)", + "labels_simple": "برچسب ها (ساده)", + "month": "ماه", + "rankings": "رتبه بندی", + "replies_assistant": "به عنوان دستیار پاسخ می دهید", + "replies_prompter": "به عنوان درخواست کننده پاسخ می دهید", + "reply_ranked_1": "پاسخ ها در رتبه اول قرار گرفتند", + "total": "کل", + "week": "هفته", + "your_account": "حساب شما", + "your_stats": "آمار شما" +} diff --git a/website/public/locales/fa/message.json b/website/public/locales/fa/message.json new file mode 100644 index 00000000..fbda3535 --- /dev/null +++ b/website/public/locales/fa/message.json @@ -0,0 +1,21 @@ +{ + "copy_message_id": "کپی کردن شناسه پیام", + "copy_message_link": "کپی کردن لینک پیام", + "label_action": "برچسب", + "label_title": "برچسب", + "message_deleted": "پیام پاک شد", + "message": "پیام", + "open_new_tab_action": "بازکردن در صفحه جدید", + "parent": "منبع", + "reactions": "واکنش ها", + "recent_messages": "پیام های اخیر", + "report_action": "گزارش", + "report_placeholder": "چرا باید این پیام بررسی شود?", + "report_title": "گزارش", + "send_report": "ارسال", + "stop_tree": "درخت توقف", + "submit_labels": "ارسال", + "tree_stopped": "درخت متوقف شده در {{id}}", + "view_user": "مشاهده کاربر", + "your_recent_messages": "پیام های اخیر شما" +} diff --git a/website/public/locales/fa/tasks.json b/website/public/locales/fa/tasks.json new file mode 100644 index 00000000..629b2163 --- /dev/null +++ b/website/public/locales/fa/tasks.json @@ -0,0 +1,84 @@ +{ + "default": { + "unchanged_title": "بدون تغییرات", + "unchanged_message": "ادامه میدهید?" + }, + "random": { + "label": "احساس خوش شانسی می کنم", + "desc": "با شروع یک کار تصادفی به ما کمک کنید تا دستیار باز را بهبود ببخشیم." + }, + "create_initial_prompt": { + "label": "درخواست های اولیه را ایجاد کنید", + "desc": "درخواست‌های اولیه را بنویسید تا به دستیار باز کمک کنید تا بتواند به پیام‌های مختلف پاسخ دهد. (شرکت در قرعه کشی)", + "overview": "یک پیام اولیه برای ارسال به دستیار ایجاد کنید", + "instruction": "درخواست های اولیه را ارائه دهید", + "response_placeholder": "درخواست خود را اینجا بنویسید..." + }, + "reply_as_user": { + "label": "به عنوان کاربر پاسخ دهید", + "desc": "با دستیار باز گپ بزنید و در هنگام تعامل با آن به بهبود پاسخ‌های آن کمک کنید.", + "overview": "با توجه به گفتگوی زیر، پاسخ کافی را ارائه دهید", + "instruction": "پاسخ کاربر را ارائه دهید", + "response_placeholder": "پاسخ خود را اینجا بنویسید..." + }, + "reply_as_assistant": { + "label": "به عنوان دستیار پاسخ دهید", + "desc": "به دستیار باز کمک کنید تا پاسخ‌های خود را با سایر کاربران را بهبود بخشد.", + "overview": "با توجه به گفتگوی زیر، پاسخ کافی را ارائه دهید", + "response_placeholder": "پاسخ خود را اینجا بنویسید..." + }, + "rank_user_replies": { + "label": "رتبه‌بندی پاسخ های کاربر", + "desc": "به دستیار باز کمک کنید تا پاسخ‌های خود را با سایر کاربران را بهبود بخشد.", + "overview": "با توجه به پاسخ‌های کاربر زیر، آنها را از بهترین به بدترین، بهترین اول بودن، بدترین آخرین بودن مرتب کنید.", + "unchanged_title": "بدون تغییر", + "unchanged_message": "شما ترتیب درخواست ها را تغییر نداده اید. به ادامه روند مطمئن هستید?" + }, + "rank_assistant_replies": { + "label": "رتبه‌بندی پاسخ های دستیار", + "desc": "اعلان‌های امتیاز توسط دستیار باز بر اساس دقت و خوانایی آن‌ها ارائه می‌شود.", + "overview": "با توجه به پاسخ‌های دستیار زیر، آنها را از بهترین به بدترین، بهترین بودن اولین، بدترین بودن آخرین مرتبه‌سازی کنید.", + "unchanged_title": "بدون تغییر", + "unchanged_message": "شما ترتیب درخواست ها را تغییر نداده اید. به ادامه روند مطمئن هستید?" + }, + "rank_initial_prompts": { + "label": "رتبه‌بندی درخواست های اولیه", + "desc": "اعلان‌های امتیاز توسط دستیار باز بر اساس دقت و خوانایی آن‌ها ارائه می‌شود.", + "overview": "با توجه به دستورات اولیه زیر، آنها را از بهترین به بدترین، بهترین بودن اولین، بدترین بودن آخرین مرتبه سازی کنید..", + "unchanged_title": "بدون تغییر", + "unchanged_message": "شما ترتیب درخواست ها را تغییر نداده اید. به ادامه روند مطمئن هستید?" + }, + "label_initial_prompt": { + "label": "اعلان اولیه را برچسب بزنید", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "برای اعلان زیر برچسب ارائه کنید" + }, + "label_prompter_reply": { + "label": "برچسب پاسخ سریع", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "با توجه به بحث زیر، برچسب هایی را برای درخواست نهایی ارائه دهید." + }, + "label_assistant_reply": { + "label": "پاسخ دستیار برچسب", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "با توجه به بحث زیر، برچسب هایی را برای درخواست نهایی ارائه دهید." + }, + "classify_initial_prompt": { + "label": "طبقه بندی درخواست اولیه", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "دستور زیر را بخوانید و سپس به سوال در مورد آن پاسخ دهید." + }, + "classify_prompter_reply": { + "label": "طبقه بندی پاسخ درخواست کننده", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "گفتگوی زیر را بخوانید و سپس به سوال در مورد آخرین پاسخ در بحث پاسخ دهید." + }, + "classify_assistant_reply": { + "label": "طبقه بندی پاسخ دستیار", + "desc": "برچسب هایی را برای یک درخواست ارائه دهید.", + "overview": "گفتگوی زیر را بخوانید و سپس به سوال در مورد آخرین پاسخ در بحث پاسخ دهید." + }, + "available_task_count": "{{count}} وظیفه موجود است", + "writing_wrong_langauge_a_b": "بنظر میرسد شما با زبان {{detected_lang}} کار می کنید ولی با زبان {{submit_lang}} ارسال میشود.", + "submitted_as": "با زبان {{submit_lang}} ارسال شد." +} diff --git a/website/public/locales/fa/tos.json b/website/public/locales/fa/tos.json new file mode 100644 index 00000000..c935a9d5 --- /dev/null +++ b/website/public/locales/fa/tos.json @@ -0,0 +1,6 @@ +{ + "title": "شرایط خدمات برای دستیار باز", + "content": "برای ادامه استفاده از دستیار باز ابتدا باید شرایط خدمات ما را بپذیرید.", + "accept": "قبول", + "decline": "رد" +} diff --git a/website/public/locales/fr/common.json b/website/public/locales/fr/common.json index 66c882c9..d05a63b9 100644 --- a/website/public/locales/fr/common.json +++ b/website/public/locales/fr/common.json @@ -6,22 +6,31 @@ "conversational": "IA conversationnelle pour tout le monde.", "copied": "Copied", "dark_mode": "Mode sombre", + "dashboard_home": "Accueil du tableau de bord", "dashboard": "Tableau de bord", "delete": "Supprimer", "discord": "Discord", "docs": "Docs", "github": "GitHub", + "leaderboard": "Classement", "legal": "Légal", "light_mode": "Mode clair", "loading": "Chargement en cours...", + "messages_dashboard": "Tableau de bord des messages", + "messages": "Messages", "more_information": "Plus d'informations", "no": "Non", "privacy_policy": "Politique de confidentialité", "report_a_bug": "Signaler un bug", "sign_in": "Se connecter", "sign_out": "Se déconnecter", + "status_dashboard": "Tableau de bord de statut", + "status": "Statut", "success": "Success", "terms_of_service": "Conditions d'utilisation", "title": "Open Assistant", + "user_leaderboard": "Classement des utilisateurs", + "users_dashboard": "Tableau de bord des utilisateurs", + "users": "Utilisateurs", "yes": "Oui" } diff --git a/website/public/locales/fr/side_menu.json b/website/public/locales/fr/side_menu.json deleted file mode 100644 index 87f62a5e..00000000 --- a/website/public/locales/fr/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Tableau de bord", - "dashboard_home": "Accueil du tableau de bord", - "leaderboard": "Classement", - "messages": "Messages", - "messages_dashboard": "Tableau de bord des messages", - "status": "Statut", - "status_dashboard": "Tableau de bord de statut", - "user_leaderboard": "Classement des utilisateurs", - "users": "Utilisateurs", - "users_dashboard": "Tableau de bord des utilisateurs" -} diff --git a/website/public/locales/hu/common.json b/website/public/locales/hu/common.json index f0cb71bb..9baa036e 100644 --- a/website/public/locales/hu/common.json +++ b/website/public/locales/hu/common.json @@ -6,22 +6,31 @@ "conversational": "Társalgási MI Mindenkinek.", "copied": "Copied", "dark_mode": "Dark Mode", + "dashboard_home": "Dashboard Home", "dashboard": "Irányítópult", "delete": "Delete", "discord": "Discord", "docs": "Leírás", "github": "GitHub", + "leaderboard": "Leaderboard", "legal": "Jogi", "light_mode": "Light Mode", "loading": "Betöltés...", + "messages_dashboard": "Messages Dashboard", + "messages": "Messages", "more_information": "További információ", "no": "Nem", "privacy_policy": "Adatvédelem", "report_a_bug": "Hibabejelentés", "sign_in": "Bejelentkezés", "sign_out": "Kijelentkezés", + "status_dashboard": "Status Dashboard", + "status": "Status", "success": "Success", "terms_of_service": "Felhasználási feltételek", "title": "Open Assistant", + "user_leaderboard": "User Leaderboard", + "users_dashboard": "Users Dashboard", + "users": "Users", "yes": "Igen" } diff --git a/website/public/locales/hu/side_menu.json b/website/public/locales/hu/side_menu.json deleted file mode 100644 index 293112f4..00000000 --- a/website/public/locales/hu/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Dashboard", - "dashboard_home": "Dashboard Home", - "leaderboard": "Leaderboard", - "messages": "Messages", - "messages_dashboard": "Messages Dashboard", - "status": "Status", - "status_dashboard": "Status Dashboard", - "user_leaderboard": "User Leaderboard", - "users": "Users", - "users_dashboard": "Users Dashboard" -} diff --git a/website/public/locales/id/common.json b/website/public/locales/id/common.json new file mode 100644 index 00000000..c1dee131 --- /dev/null +++ b/website/public/locales/id/common.json @@ -0,0 +1,38 @@ +{ + "about": "Tentang", + "account_settings": "Akun", + "admin_dashboard": "Dasbor Admin", + "connect": "Terhubung", + "conversational": "AI Percakapan untuk semuanya", + "copied": "Disalin", + "dark_mode": "Mode Gelap", + "dashboard_home": "Dasbor Awal", + "dashboard": "Dasbor", + "delete": "Hapus", + "discord": "Discord", + "docs": "Docs", + "github": "GitHub", + "leaderboard": "Papan Peringkat", + "legal": "Legal", + "light_mode": "Mode Terang", + "loading": "Memuat...", + "messages_dashboard": "Dasbor Pesan", + "messages": "Pesan", + "more_information": "Informasi lebih lanjut", + "no": "Tidak", + "parameters": "Parameter", + "privacy_policy": "Kebijakan Privasi", + "report_a_bug": "Laporkan Bug", + "sign_in": "Masuk", + "sign_out": "Keluar", + "status": "Status", + "status_dashboard": "Dasbor Status", + "success": "Sukses", + "terms_of_service": "Syarat Layanan", + "title": "Open Assistant", + "trollboard": "Trollboard", + "user_leaderboard": "Papan Peringkat Pengguna", + "users_dashboard": "Dasbor Pengguna", + "users": "Pengguna", + "yes": "Ya" +} diff --git a/website/public/locales/id/dashboard.json b/website/public/locales/id/dashboard.json new file mode 100644 index 00000000..67da3e30 --- /dev/null +++ b/website/public/locales/id/dashboard.json @@ -0,0 +1,8 @@ +{ + "grab_a_task": "Ambil tugas!", + "create": "Buat", + "evaluate": "Evaluasi", + "label": "Label", + "dashboard": "Dasbor", + "go": "Mulai" +} diff --git a/website/public/locales/id/index.json b/website/public/locales/id/index.json new file mode 100644 index 00000000..25d3440b --- /dev/null +++ b/website/public/locales/id/index.json @@ -0,0 +1,23 @@ +{ + "blurb": "Kami percaya kita dapat memulai revolusi.", + "blurb1": "Dengan cara yang sama Stable Diffusion membantu dunia membuat seni dan gambar dengan cara baru, kami ingin meningkatkan dunia dengan menyediakan AI percakapan yang luar biasa.", + "description": "AI Percakapan untuk semua orang. Proyek sumber terbuka untuk membuat model bahasa yang didukung chat oleh LAION dan kontributor di seluruh dunia.", + "faq_items": { + "q0": "Sampai tahap seperti apa proyek ini?", + "a0": "Kami masih berada pada tahap awal pengembangan, bekerja dengan melakukan penelitian yang sudah ada tentang penerapan RLHF pada model bahasa besar.", + "q1": "Siapa yang berada di belakang Open Assistant?", + "a1": "Open Assistant adalah sebuah proyek yang didirikan oleh LAION dan individu di seluruh dunia yang tertarik untuk membawa teknologi ini kepada semua orang.", + "q2": "Lisensi apa yang digunakan Open Assistant?", + "a2": "Kode dan model dilisensikan di bawah lisensi Apache 2.0.", + "q3": "Apakah data pelatihan juga akan dirilis?", + "a3": "Ya, di bawah CC BY 4.0.", + "q4": "Apakah Open Assistant gratis?", + "a4": "Ya, Open Assistant akan gratis digunakan dan dimodifikasi.", + "q5": "Perangkat keras apa yang dibutuhkan untuk menjalankan model?", + "a5": "Akan ada versi yang dapat dijalankan pada perangkat konsumen." + }, + "faq_title": "Pertanyaan yang Sering Ditanyakan", + "join_us_description": "Semua proyek sumber terbuka dimulai dari orang-orang seperti Anda. Sumber terbuka adalah keyakinan bahwa jika kita bekerja sama, kita bisa bersama-sama memberikan pengetahuan dan teknologi kita kepada dunia demi kebaikan umat manusia. Mau bergabung? Temukan kami di sini:", + "join_us_title": "Bergabunglah bersama kami", + "subtitle": "AI Percakapan untuk semua orang." +} diff --git a/website/public/locales/id/labelling.json b/website/public/locales/id/labelling.json new file mode 100644 index 00000000..785901d4 --- /dev/null +++ b/website/public/locales/id/labelling.json @@ -0,0 +1,24 @@ +{ + "fails_task.question": "Apakah ini jawaban yang buruk untuk menjawab prompt di atas?", + "hate_speech": "Ujaran Kebencian", + "hate_speech.explanation": "Berisi konten yang merusak, mengancam, dan mengekspresikan prasangka buruk atas perihal yang dilindungi hukum. Prasangka dikatakan buruk apabila tidak didukung alasan yang tepat. Perihal yang dilindungi hukum adalah seperti jenis kelamin, etnisitas, agama, orientasi seksual, dan sebagainya.", + "label_highlighted_flag_instruction": "Pilih yang sesuai dengan pesan yang sedang disorot:", + "label_highlighted_likert_instruction": "Nilai pesan yang disorot:", + "label_highlighted_yes_no_instruction": "Jawab pertanyaan berikut terkait dengan pesan yang disorot:", + "label_message_flag_instruction": "Pilih yang sesuai dengan pesan:", + "label_message_likert_instruction": "Nilai pesan:", + "label_message_yes_no_instruction": "Jawab pertanyaan berikut yang terkait dengan pesan:", + "lang_mismatch": "Salah Bahasa", + "lang_mismatch.explanation": "Tidak tertulis sesuai dengan bahasa yang dipilih.", + "moral_judgement": "Moral Penghakiman", + "moral_judgement.explanation": "Berisi penghakiman secara moral.", + "not_appropriate": "Tidak Layak", + "not_appropriate.explanation": "Tidak layak untuk asisten percakapan.", + "pii": "Berisi Data Pribadi", + "pii.explanation": "Berisi informasi yang mungkin bisa digunakan untuk mengidentifikasi seseorang. Seperti, nomor telepon, alamat, nomor induk, dan informasi perbankan.", + "political_content": "Politik", + "political_content.explanation": "Berisi konten politik.", + "sexual_content": "Konten Seksual", + "sexual_content.explanation": "Berisi konten seksual.", + "spam.question": "Apakah ini spam?" +} diff --git a/website/public/locales/id/leaderboard.json b/website/public/locales/id/leaderboard.json new file mode 100644 index 00000000..5468fb4e --- /dev/null +++ b/website/public/locales/id/leaderboard.json @@ -0,0 +1,33 @@ +{ + "accepted": "↪ Diterima", + "accepted_prompts": "Perintah yang diterima", + "daily": "Harian", + "day": "Hari", + "good_rankings": "Peringkat yang baik", + "label": "Label", + "labels_full": "Label (lengkap)", + "labels_simple": "Label (sederhana)", + "last_updated_at": "Terakhir diperbarui: {{val, datetime}}", + "leaderboard": "Papan peringkat", + "month": "Bulan", + "monthly": "Bulanan", + "next": "Selanjutnya", + "overall": "Garis Besar", + "previous": "Sebelumnya", + "prompt": "Perintah", + "rank": "Peringkat", + "rankings": "Peringkat", + "replies_assistant": "Balasan sebagai Asisten", + "replies_prompter": "Balasan sebagai Prompter", + "reply": "Balasan", + "reply_ranked_1": "Balasan yang diberi peringkat 1", + "score": "Skor", + "top_5_contributors_today": "5 Kontributor Teratas Hari Ini", + "total": "Total", + "user": "Pengguna", + "view_all": "Lihat semua", + "week": "Minggu", + "weekly": "Mingguan", + "your_account": "Akun Anda", + "your_stats": "Statistik Anda" +} diff --git a/website/public/locales/id/message.json b/website/public/locales/id/message.json new file mode 100644 index 00000000..f2727076 --- /dev/null +++ b/website/public/locales/id/message.json @@ -0,0 +1,20 @@ +{ + "copy_message_id": "Salin ID Pesan", + "label_action": "Label", + "label_title": "Label", + "message_deleted": "Pesan dihapus", + "message": "Pesan", + "open_new_tab_action": "Buka di tab baru", + "parent": "Induk", + "reactions": "Reaksi", + "recent_messages": "Pesan Terbaru", + "report_action": "Laporkan", + "report_placeholder": "Tuliskan alasan Anda", + "report_title": "Laporkan", + "send_report": "Kirim laporan", + "stop_tree": "Hentikan struktur pohon", + "submit_labels": "Simpan label", + "tree_stopped": "Struktur pohon dihentikan {{id}}", + "view_user": "Lihat pengguna", + "your_recent_messages": "Pesan terbaru Anda" +} diff --git a/website/public/locales/id/tasks.json b/website/public/locales/id/tasks.json new file mode 100644 index 00000000..a2449067 --- /dev/null +++ b/website/public/locales/id/tasks.json @@ -0,0 +1,87 @@ +{ + "default": { + "unchanged_title": "Tidak ada perubahan", + "unchanged_message": "Apakah Anda yakin ingin melanjutkan?" + }, + "random": { + "label": "Saya merasa beruntung", + "desc": "Bantu kami memperbaiki Open Assistant dengan memilih tugas secara acak." + }, + "create_initial_prompt": { + "label": "Buat Prompt Awal", + "desc": "Tulis prompt awal untuk membantu Open Assistant mencoba membalas pesan yang beragam. (masuk ke undian)", + "overview": "Buat pesan awal untuk dikirimkan ke asisten", + "instruction": "Berikan prompt awal", + "response_placeholder": "Tulis prompt Anda di sini..." + }, + "reply_as_user": { + "label": "Balas sebagai Pengguna", + "desc": "Berbicara dengan Open Assistant dan bantu meningkatkan respons dengan berinteraksi dengannya.", + "overview": "Berdasarkan percakapan berikut, berikan balasan yang sesuai", + "instruction": "Berikan balasan pengguna", + "response_placeholder": "Tulis balasan Anda di sini..." + }, + "reply_as_assistant": { + "label": "Balas sebagai Asisten", + "desc": "Bantu Open Assistant memperbaiki respons terhadap percakapan dengan pengguna lain.", + "overview": "Berdasarkan percakapan berikut, berikan balasan yang sesuai", + "response_placeholder": "Tulis balasan Anda di sini..." + }, + "rank_user_replies": { + "label": "Urutkan Balasan Pengguna", + "desc": "Bantu Open Assistant memperbaiki respons terhadap percakapan dengan pengguna lain.", + "overview": "Berdasarkan Balasan Pengguna berikut, urutkan dari yang terbaik ke yang terburuk, yang terbaik pertama dan yang terburuk terakhir.", + "unchanged_title": "Urutan Tidak Berubah", + "unchanged_message": "Anda belum mengubah urutan prompt. Apakah Anda yakin ingin melanjutkan?" + }, + "rank_assistant_replies": { + "label": "Urutkan Balasan Asisten", + "desc": "Skor prompt yang diberikan oleh Open Assistant berdasarkan akurasi dan keterbacaan.", + "overview": "Berdasarkan Balasan Asisten berikut, urutkan dari yang terbaik ke yang terburuk, yang terbaik", + "rank_assistant_replies": { + "label": "Peringkat Balasan Asisten", + "desc": "Penilaian pernyataan oleh Open Assistant berdasarkan akurasi dan kemudahan dibaca.", + "overview": "Berdasarkan balasan Asisten berikut, urutkan dari terbaik ke terburuk, terbaik ada di pertama, terburuk ada di terakhir.", + "unchanged_title": "Urutan Tak Berubah", + "unchanged_message": "Anda belum mengubah urutan pernyataan. Apakah Anda yakin ingin melanjutkan?" + }, + "rank_initial_prompts": { + "label": "Peringkat Pernyataan Awal", + "desc": "Penilaian pernyataan oleh Open Assistant berdasarkan akurasi dan kemudahan dibaca.", + "overview": "Berdasarkan pernyataan awal berikut, urutkan dari terbaik ke terburuk, terbaik ada di pertama, terburuk ada di terakhir.", + "unchanged_title": "Urutan Tak Berubah", + "unchanged_message": "Anda belum mengubah urutan pernyataan. Apakah Anda yakin ingin melanjutkan?" + }, + "label_initial_prompt": { + "label": "Label Pernyataan Awal", + "desc": "Berikan label untuk pernyataan.", + "overview": "Berikan label untuk pernyataan berikut" + }, + "label_prompter_reply": { + "label": "Label Balasan Prompter", + "desc": "Berikan label untuk pernyataan.", + "overview": "Berdasarkan diskusi berikut, berikan label untuk pernyataan akhir." + }, + "label_assistant_reply": { + "label": "Label Balasan Asisten", + "desc": "Berikan label untuk pernyataan.", + "overview": "Berdasarkan diskusi berikut, berikan label untuk pernyataan akhir." + }, + "classify_initial_prompt": { + "label": "Klasifikasi Pernyataan Awal", + "desc": "Berikan label untuk pernyataan.", + "overview": "Baca pernyataan berikut dan kemudian jawab pertanyaan tentang itu." + }, + "classify_prompter_reply": { + "label": "Klasifikasi Balasan Prompter", + "desc": "Berikan label untuk pernyataan.", + "overview": "Baca percakapan berikut dan kemudian jawab pertanyaan tentang balasan terakhir dalam diskusi." + }, + "classify_assistant_reply": { + "label": "Klasifikasi Balasan Asisten", + "desc": "Berikan label untuk pernyataan.", + "overview": "Baca percakapan berikut dan kemudian jawab pertanyaan tentang balasan terakhir dalam diskusi." + }, + "available_task_count": "{{count}} tugas tersedia" + } +} diff --git a/website/public/locales/id/tos.json b/website/public/locales/id/tos.json new file mode 100644 index 00000000..dac71a29 --- /dev/null +++ b/website/public/locales/id/tos.json @@ -0,0 +1,6 @@ +{ + "title": "Syarat dan Ketentuan Penggunaan Open Assistant", + "content": "Untuk melanjutkan menggunakan Open Assistant, anda harus menerima Syarat dan Ketentuan Penggunaan terlebih dahulu.", + "accept": "Terima", + "decline": "Tolak" +} diff --git a/website/public/locales/it/common.json b/website/public/locales/it/common.json index ad6b45be..32204ed2 100644 --- a/website/public/locales/it/common.json +++ b/website/public/locales/it/common.json @@ -6,22 +6,31 @@ "conversational": "I.A. di conversazione per tutti.", "copied": "Copiato", "dark_mode": "Modalità scura", + "dashboard_home": "Schermata iniziale", "dashboard": "Pannello di controllo", "delete": "Cancella", "discord": "Discord", "docs": "Documenti", "github": "GitHub", + "leaderboard": "Classifica", "legal": "Legale", "light_mode": "Modalità chiara", "loading": "Caricando...", + "messages_dashboard": "Pannello di controllo dei messaggi", + "messages": "Messaggi", "more_information": "Maggiori Informazioni", "no": "No", "privacy_policy": "Politica sulla Privacy", "report_a_bug": "Segnala un problema", "sign_in": "Sign In", "sign_out": "Esci", + "status_dashboard": "Pannello di controllo dello stato", + "status": "Stato", "success": "Successo", "terms_of_service": "Termini di Servizio", "title": "Open Assistant", + "user_leaderboard": "Classifica dell'utente", + "users_dashboard": "Pannello di controllo degli utenti", + "users": "Utenti", "yes": "Si" } diff --git a/website/public/locales/it/side_menu.json b/website/public/locales/it/side_menu.json deleted file mode 100644 index 9bb8b7fb..00000000 --- a/website/public/locales/it/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Pannello di controllo", - "dashboard_home": "Schermata iniziale", - "leaderboard": "Classifica", - "messages": "Messaggi", - "messages_dashboard": "Pannello di controllo dei messaggi", - "status": "Stato", - "status_dashboard": "Pannello di controllo dello stato", - "user_leaderboard": "Classifica dell'utente", - "users": "Utenti", - "users_dashboard": "Pannello di controllo degli utenti" -} diff --git a/website/public/locales/ko/common.json b/website/public/locales/ko/common.json index b7029019..491f8dbd 100644 --- a/website/public/locales/ko/common.json +++ b/website/public/locales/ko/common.json @@ -6,22 +6,31 @@ "conversational": "모두를 위한 대화형 AI.", "copied": "Copied", "dark_mode": "다크 모드", + "dashboard_home": "대시보드 홈", "dashboard": "대시보드", "delete": "삭제", "discord": "Discord", "docs": "문서", "github": "GitHub", + "leaderboard": "리더보드", "legal": "Legal", "light_mode": "라이트 모드", "loading": "로딩중...", + "messages_dashboard": "메세지 대시보드", + "messages": "메세지", "more_information": "추가 정보", "no": "아니오", "privacy_policy": "개인정보보호 정책", "report_a_bug": "버그신고", "sign_in": "Sign In", "sign_out": "Sign Out", + "status_dashboard": "상태 대시보드", + "status": "상태", "success": "Success", "terms_of_service": "서비스 약관", "title": "오픈 어시스턴트", + "user_leaderboard": "사용자 리더보드", + "users_dashboard": "사용자 대시보드", + "users": "사용자", "yes": "예" } diff --git a/website/public/locales/ko/side_menu.json b/website/public/locales/ko/side_menu.json deleted file mode 100644 index 99b43380..00000000 --- a/website/public/locales/ko/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "대시보드", - "dashboard_home": "대시보드 홈", - "leaderboard": "리더보드", - "messages": "메세지", - "messages_dashboard": "메세지 대시보드", - "status": "상태", - "status_dashboard": "상태 대시보드", - "user_leaderboard": "사용자 리더보드", - "users": "사용자", - "users_dashboard": "사용자 대시보드" -} diff --git a/website/public/locales/pl/common.json b/website/public/locales/pl/common.json index 01bca692..cb2db217 100644 --- a/website/public/locales/pl/common.json +++ b/website/public/locales/pl/common.json @@ -6,22 +6,31 @@ "conversational": "Konwersacyjna SI dla każdego.", "copied": "Skopiowano", "dark_mode": "Tryb ciemny", + "dashboard_home": "Strona Główna Panelu", "dashboard": "Panel", "delete": "Usuń", "discord": "Discord", "docs": "Dokumentacja", "github": "GitHub", + "leaderboard": "Ranking", "legal": "Prawa", "light_mode": "Tryb jasny", "loading": "Wczytywanie...", + "messages_dashboard": "Strona Główna Wiadomości", + "messages": "Wiadomości", "more_information": "Więcej Informacji", "no": "Nie", "privacy_policy": "Polityka Prywatności", "report_a_bug": "Zgłoś Błąd", "sign_in": "Zaloguj Się", "sign_out": "Wyloguj Się", + "status_dashboard": "Panel Statusu", + "status": "Status", "success": "Sukces", "terms_of_service": "Warunki Usługi", "title": "Open Assistant", + "user_leaderboard": "Ranking Użytkowników", + "users_dashboard": "Panel Użytkownika", + "users": "Użytkownicy", "yes": "Tak" } diff --git a/website/public/locales/pl/side_menu.json b/website/public/locales/pl/side_menu.json deleted file mode 100644 index f352bd9f..00000000 --- a/website/public/locales/pl/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Panel", - "dashboard_home": "Strona Główna Panelu", - "messages": "Wiadomości", - "messages_dashboard": "Strona Główna Wiadomości", - "leaderboard": "Ranking", - "user_leaderboard": "Ranking Użytkowników", - "users": "Użytkownicy", - "users_dashboard": "Panel Użytkownika", - "status": "Status", - "status_dashboard": "Panel Statusu" -} diff --git a/website/public/locales/pt-BR/common.json b/website/public/locales/pt-BR/common.json index 0e33ed21..cfa5a25f 100644 --- a/website/public/locales/pt-BR/common.json +++ b/website/public/locales/pt-BR/common.json @@ -6,22 +6,31 @@ "conversational": "IA conversacional para todos.", "copied": "Copiado", "dark_mode": "Tema Escuro", + "dashboard_home": "Página principal", "dashboard": "Painel", "delete": "Deletar", "discord": "Discord", "docs": "Documentação", "github": "GitHub", + "leaderboard": "Leaderboard", "legal": "Legal", "light_mode": "Tema Claro", "loading": "Carregando...", + "messages_dashboard": "Painel de mensagens", + "messages": "Mensagens", "more_information": "Mais informações...", "no": "Não", "privacy_policy": "Política de Privacidade", "report_a_bug": "Reportar um erro", "sign_in": "Entrar", "sign_out": "Sair", + "status_dashboard": "Painel de status", + "status": "Status", "success": "Sucesso", "terms_of_service": "Termos de serviço", "title": "Open Assistant", + "user_leaderboard": "Leaderboard dos usuários", + "users_dashboard": "Painel de controle de usuários", + "users": "Usuários", "yes": "Sim" } diff --git a/website/public/locales/pt-BR/side_menu.json b/website/public/locales/pt-BR/side_menu.json deleted file mode 100644 index 78b01d57..00000000 --- a/website/public/locales/pt-BR/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Painel", - "dashboard_home": "Página principal", - "leaderboard": "Leaderboard", - "messages": "Mensagens", - "messages_dashboard": "Painel de mensagens", - "status": "Status", - "status_dashboard": "Painel de status", - "user_leaderboard": "Leaderboard dos usuários", - "users": "Usuários", - "users_dashboard": "Painel de controle de usuários" -} diff --git a/website/public/locales/ru/common.json b/website/public/locales/ru/common.json index 941deeb1..471bad6b 100644 --- a/website/public/locales/ru/common.json +++ b/website/public/locales/ru/common.json @@ -2,26 +2,36 @@ "about": "О нас", "account_settings": "Аккаунт", "admin_dashboard": "Панель администратора", - "connect": "Подключиться", + "connect": "Присоединиться к нам", "conversational": "Разговорный ИИ для каждого.", "copied": "Скопировано", "dark_mode": "Темная тема", + "dashboard_home": "Главная страница", "dashboard": "Главная", "delete": "Удалить", "discord": "Discord", "docs": "Документация", "github": "GitHub", - "legal": "Legal", + "leaderboard": "Рейтинг", + "legal": "Правовая информация", "light_mode": "Светлая тема", "loading": "Загрузка...", + "messages_dashboard": "Панель просмотра сообщений", + "messages": "Сообщения", "more_information": "Больше...", "no": "Нет", "privacy_policy": "Политика конфиденциальности", "report_a_bug": "Сообщить об ошибке", "sign_in": "Вход", "sign_out": "Выйти из аккаунта", + "status_dashboard": "Панель состояния системы", + "status": "Статус", "success": "Успешно", "terms_of_service": "Пользовательское Соглашение", "title": "Open Assistant", + "trollboard": "Доска позора", + "user_leaderboard": "Таблица лидеров", + "users_dashboard": "Панель управления пользователями", + "users": "Пользователи", "yes": "Да" } diff --git a/website/public/locales/ru/labelling.json b/website/public/locales/ru/labelling.json index a2257ff5..cc1073b7 100644 --- a/website/public/locales/ru/labelling.json +++ b/website/public/locales/ru/labelling.json @@ -8,8 +8,8 @@ "label_message_flag_instruction": "Выберите все, что относится к сообщению:", "label_message_likert_instruction": "Оцените сообщение:", "label_message_yes_no_instruction": "Ответьте на следующий вопрос (вопросы) о сообщении:", - "lang_mismatch": "Неправильный язык", - "lang_mismatch.explanation": "Не написано на выбранном языке.", + "lang_mismatch": "Неправильный язык (не {lang})", + "lang_mismatch.explanation": "Не написано на выбранном языке — {lang}.", "moral_judgement": "Оценивает мораль", "moral_judgement.explanation": "Выражает субъективную моральную оценку.", "not_appropriate": "Неуместный ответ", diff --git a/website/public/locales/ru/leaderboard.json b/website/public/locales/ru/leaderboard.json index 77971485..6f7367fa 100644 --- a/website/public/locales/ru/leaderboard.json +++ b/website/public/locales/ru/leaderboard.json @@ -1,18 +1,33 @@ { + "accepted": "↪ Принято", + "accepted_prompts": "Принятые запросы", "daily": "За день", + "day": "День", + "good_rankings": "Хорошо оценённые (Good rankings)", "label": "Теги", + "labels_full": "Теги (полная классификация)", + "labels_simple": "Теги (упрощённая классификация)", "last_updated_at": "Последний раз обновлено: {{val, datetime}}", "leaderboard": "Таблица лидеров", + "month": "Месяц", "monthly": "За месяц", "next": "Вперед", "overall": "Всего", "previous": "Назад", "prompt": "Запросы", "rank": "Позиция", + "rankings": "Сортировка", + "replies_assistant": "Ответы в качестве Ассистиента", + "replies_prompter": "Ответы в качестве Пользователя", "reply": "Ответы", + "reply_ranked_1": "Ваши ответы, оцененные как лучшие", "score": "Счет", "top_5_contributors_today": "Топ 5 Пользователей за Сегодня", + "total": "Всего", "user": "Пользователь", "view_all": "Посмотреть все", - "weekly": "За неделю" + "week": "Неделя", + "weekly": "За неделю", + "your_account": "Аккаунт", + "your_stats": "Статистика" } diff --git a/website/public/locales/ru/message.json b/website/public/locales/ru/message.json index d262b67c..cbb3e8a2 100644 --- a/website/public/locales/ru/message.json +++ b/website/public/locales/ru/message.json @@ -1,5 +1,6 @@ { - "copy_message_id": "Скопировать message ID", + "copy_message_id": "Скопировать \"message ID\"", + "copy_message_link": "Скопировать ссылку на сообщение", "label_action": "Классифицировать", "label_title": "Классифицировать", "message": "Сообщение", @@ -7,14 +8,14 @@ "open_new_tab_action": "Открыть в новой вкладке", "parent": "Родитель", "reactions": "Реакции", - "recent_messages": "Последние Сообщения", + "recent_messages": "Последние Сообщения ({{language}})", "report_action": "Пожаловаться", "report_placeholder": "Почему это сообщение должно быть рассмотрено?", "report_title": "Пожаловаться", "send_report": "Отправить", - "stop_tree": "Stop tree", + "stop_tree": "Завершить граф", "submit_labels": "Отправить", - "tree_stopped": "Tree stopped {{id}}", + "tree_stopped": "Граф завершён {{id}}", "view_user": "О пользователе", "your_recent_messages": "Ваши Последние сообщения" } diff --git a/website/public/locales/ru/side_menu.json b/website/public/locales/ru/side_menu.json deleted file mode 100644 index e72ef732..00000000 --- a/website/public/locales/ru/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Главная", - "dashboard_home": "Главная страница", - "leaderboard": "Рейтинг", - "messages": "Сообщения", - "messages_dashboard": "Панель просмотра сообщений", - "status": "Статус", - "status_dashboard": "Панель состояния системы", - "user_leaderboard": "Таблица лидеров", - "users": "Пользователи", - "users_dashboard": "Панель управления пользователями" -} diff --git a/website/public/locales/ru/tasks.json b/website/public/locales/ru/tasks.json index 8968d1f9..3b6e747b 100644 --- a/website/public/locales/ru/tasks.json +++ b/website/public/locales/ru/tasks.json @@ -1,19 +1,11 @@ { - "available_task_count": "Доступных заданий: {{count}}", - "classify_assistant_reply": { - "label": "Оцените ответ Ассистента", - "desc": "Расставьте метки для запроса.", - "overview": "Прочитайте следующую переписку, а затем ответьте на вопросы о последнем сообщении." + "default": { + "unchanged_title": "Без изменений", + "unchanged_message": "Вы уверены, что хотите продолжить?" }, - "classify_initial_prompt": { - "label": "Оцените исходный запрос", - "desc": "Расставьте метки для запроса.", - "overview": "Прочитайте запрос, а затем ответьте на вопросы о нем." - }, - "classify_prompter_reply": { - "label": "Оцените ответ пользователя", - "desc": "Предоставьте метки для ответа.", - "overview": "Прочитайте следующую переписку, а затем ответьте на вопросы о последнем сообщении." + "random": { + "label": "Мне повезет", + "desc": "Помогите нам улучшить Open Assistant, начав выполнять случайное задание." }, "create_initial_prompt": { "label": "Создать изначальный запрос", @@ -22,28 +14,25 @@ "instruction": "Предоставьте изначальный запрос", "response_placeholder": "Напишите свой запрос здесь..." }, - "default": { - "unchanged_title": "Без изменений", - "unchanged_message": "Вы уверены, что хотите продолжить?" + "reply_as_user": { + "label": "Ответить как пользователь", + "desc": "Общайтесь с Open Assistant и помогайте улучшать его ответы в процессе взаимодействия с ним.", + "overview": "Прочитайте диалог, а затем ответьте от лица пользователя, пожалуйста, постарайтесь отвечать адекватно.", + "instruction": "Напишите от лица пользователя", + "response_placeholder": "Напишите свой ответ..." }, - "label_assistant_reply": { - "label": "Оцените ответ Ассистента", - "desc": "Выберите подходящие метки для ответа Ассистента.", - "overview": "Прочтите диалог, а затем выберите подходящие метки для последнего ответа." + "reply_as_assistant": { + "label": "Ответить как Ассистент", + "desc": "Помогите Open Assistant улучшить свои ответы, отвечая от лица Ассистента.", + "overview": "Прочитайте диалог, а затем ответьте от лица Ассистента, пожалуйста, постарайтесь отвечать адекватно.", + "response_placeholder": "Напишите свой ответ..." }, - "label_initial_prompt": { - "label": "Оцените изначальный запрос", - "desc": "Выберите подходящие метки для запроса.", - "overview": "Выберите подходящие метки для следующего запроса." - }, - "label_prompter_reply": { - "label": "Оцените ответ пользователя", - "desc": "Выберите подходящие метки для ответа.", - "overview": "Прочтите диалог, а затем выберите подходящие метки для последнего ответа." - }, - "random": { - "label": "Мне повезет", - "desc": "Помогите нам улучшить Open Assistant, начав выполнять случайное задание." + "rank_user_replies": { + "label": "Оценить ответы пользователей", + "desc": "Помогите Open Assistant, оценив ответы пользователей.", + "overview": "Учитывая приведённые ответы пользователей, отсортируйте их от лучшего к худшему, лучший — первый, худший — последний.", + "unchanged_title": "Порядок не был изменён", + "unchanged_message": "Вы не изменили порядок ответов. Вы уверены, что хотите продолжить?" }, "rank_assistant_replies": { "label": "Оценить ответы Ассистента", @@ -59,24 +48,37 @@ "unchanged_title": "Порядок не был изменён", "unchanged_message": "Вы не изменили порядок запросов. Вы уверены, что хотите продолжить?" }, - "rank_user_replies": { - "label": "Оценить ответы пользователей", - "desc": "Помогите Open Assistant, оценив ответы пользователей.", - "overview": "Учитывая приведённые ответы пользователей, отсортируйте их от лучшего к худшему, лучший — первый, худший — последний.", - "unchanged_title": "Порядок не был изменён", - "unchanged_message": "Вы не изменили порядок ответов. Вы уверены, что хотите продолжить?" + "label_initial_prompt": { + "label": "Оценить изначальный запрос", + "desc": "Выберите подходящие метки для запроса.", + "overview": "Выберите подходящие метки для следующего запроса." }, - "reply_as_assistant": { - "label": "Ответить как ассистент", - "desc": "Помогите Open Assistant улучшить свои ответы, отвечая от лица Ассистента.", - "overview": "Прочитайте диалог, а затем ответьте от лица Ассистента, пожалуйста, постарайтесь отвечать адекватно.", - "response_placeholder": "Напишите свой ответ..." + "label_prompter_reply": { + "label": "Оценить ответ пользователя", + "desc": "Выберите подходящие метки для ответа.", + "overview": "Прочтите диалог, а затем выберите подходящие метки для последнего ответа." }, - "reply_as_user": { - "label": "Ответить как пользователь", - "desc": "Общайтесь с Open Assistant и помогайте улучшать его ответы в процессе взаимодействия с ним.", - "overview": "Прочитайте диалог, а затем ответьте от лица пользователя, пожалуйста, постарайтесь отвечать адекватно.", - "instruction": "Напишите от лица пользователя", - "response_placeholder": "Напишите свой ответ..." - } + "label_assistant_reply": { + "label": "Оценить ответ Ассистента", + "desc": "Выберите подходящие метки для ответа Ассистента.", + "overview": "Прочтите диалог, а затем выберите подходящие метки для последнего ответа." + }, + "classify_initial_prompt": { + "label": "Оценить исходный запрос", + "desc": "Расставьте метки для запроса.", + "overview": "Прочитайте запрос, а затем ответьте на вопросы о нем." + }, + "classify_prompter_reply": { + "label": "Оценить ответ пользователя", + "desc": "Предоставьте метки для ответа.", + "overview": "Прочитайте следующую переписку, а затем ответьте на вопросы о последнем сообщении." + }, + "classify_assistant_reply": { + "label": "Оценить ответ Ассистента", + "desc": "Расставьте метки для запроса.", + "overview": "Прочитайте следующую переписку, а затем ответьте на вопросы о последнем сообщении." + }, + "available_task_count": "Доступных заданий: {{count}}", + "writing_wrong_langauge_a_b": "Язык вашего текста определён как: {{detected_lang}}, но он будет отображаться как: {{submit_lang}}.", + "submitted_as": "Будет помечено как {{submit_lang}} язык" } diff --git a/website/public/locales/tr/common.json b/website/public/locales/tr/common.json index bd220d7c..5499c97c 100644 --- a/website/public/locales/tr/common.json +++ b/website/public/locales/tr/common.json @@ -6,22 +6,31 @@ "conversational": "Herkes için etkileşimli AI", "copied": "Kopyalandı", "dark_mode": "Karanlık Mod", + "dashboard_home": "Kontrol Paneli Ana Sayfa", "dashboard": "Kontrol Paneli", "delete": "Sil", "discord": "Discord", "docs": "Dokümantasyon", "github": "GitHub", + "leaderboard": "Lider Tablosu", "legal": "Yasal", "light_mode": "Açık Mod", "loading": "Yükleniyor...", + "messages_dashboard": "Mesaj Kontrol Paneli", + "messages": "Mesajlar", "more_information": "Daha Fazla Bilgi", "no": "No", "privacy_policy": "Gizlilik Politikası", "report_a_bug": "Hata Bildir", "sign_in": "Giriş Yap", "sign_out": "Çıkış Yap", + "status_dashboard": "Durum Kontrol Paneli", + "status": "Durum", "success": "Başarılı", "terms_of_service": "Kullanım Şartları", "title": "Open Assistant", + "user_leaderboard": "Kullanıcı Lider Tablosu", + "users_dashboard": "Kullanıcı Kontrol Paneli", + "users": "Kullanıcılar", "yes": "Evet" } diff --git a/website/public/locales/tr/side_menu.json b/website/public/locales/tr/side_menu.json deleted file mode 100644 index 8add2136..00000000 --- a/website/public/locales/tr/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Kontrol Paneli", - "dashboard_home": "Kontrol Paneli Ana Sayfa", - "leaderboard": "Lider Tablosu", - "messages": "Mesajlar", - "messages_dashboard": "Mesaj Kontrol Paneli", - "status": "Durum", - "status_dashboard": "Durum Kontrol Paneli", - "user_leaderboard": "Kullanıcı Lider Tablosu", - "users": "Kullanıcılar", - "users_dashboard": "Kullanıcı Kontrol Paneli" -} diff --git a/website/public/locales/uk-UA/common.json b/website/public/locales/uk-UA/common.json index 8f8829ea..4904b4e4 100644 --- a/website/public/locales/uk-UA/common.json +++ b/website/public/locales/uk-UA/common.json @@ -6,22 +6,31 @@ "conversational": "Розмовний ШІ для кожного.", "copied": "Скопійовано", "dark_mode": "Темний режим", - "dashboard": "Головна панель", + "dashboard_home": "Головна панель", + "dashboard": "Головна", "delete": "Видалити", "discord": "Discord", "docs": "Документація", "github": "GitHub", + "leaderboard": "Рейтинг лідерів", "legal": "Юридична інформація", "light_mode": "Світлий режим", "loading": "Завантаження...", + "messages_dashboard": "Панель повідомлень", + "messages": "Повідомлення", "more_information": "Більше інформації", "no": "Ні", "privacy_policy": "Політика конфіденційності", "report_a_bug": "Сповістити про помилку", "sign_in": "Війти", "sign_out": "Вийти", + "status_dashboard": "Панель статусів", + "status": "Статус", "success": "Успіх", "terms_of_service": "Умови використання", "title": "Open Assistant", + "user_leaderboard": "User Leaderboard", + "users_dashboard": "Панель користувачів", + "users": "Користувачі", "yes": "Так" } diff --git a/website/public/locales/uk-UA/side_menu.json b/website/public/locales/uk-UA/side_menu.json deleted file mode 100644 index 4111a0c7..00000000 --- a/website/public/locales/uk-UA/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "Головна", - "dashboard_home": "Головна панель", - "leaderboard": "Рейтинг лідерів", - "messages": "Повідомлення", - "messages_dashboard": "Панель повідомлень", - "status": "Статус", - "status_dashboard": "Панель статусів", - "user_leaderboard": "User Leaderboard", - "users": "Користувачі", - "users_dashboard": "Панель користувачів" -} diff --git a/website/public/locales/vi/common.json b/website/public/locales/vi/common.json index 9896e4c2..f02091b6 100644 --- a/website/public/locales/vi/common.json +++ b/website/public/locales/vi/common.json @@ -3,25 +3,35 @@ "account_settings": "Tài khoản", "admin_dashboard": "Trang cho admin", "connect": "Liên hệ", - "conversational": "Chatbot AI cho tất cả mọi người", - "copied": "Copied", + "conversational": "Chatbot AI cho mọi người", + "copied": "Đã sao chép", "dark_mode": "Giao diện tối", + "dashboard_home": "Trang chính", "dashboard": "Trang chính", "delete": "Xoá", "discord": "Discord", "docs": "Hướng dẫn", "github": "GitHub", + "leaderboard": "Bảng xếp hạng", "legal": "Luật lệ", "light_mode": "Giao diện sáng", "loading": "Đang tải...", + "messages_dashboard": "Trang tin nhắn", + "messages": "Tin nhắn", "more_information": "Xem thêm", "no": "Không", + "parameters": "Thông số", "privacy_policy": "Chính sách bảo mật", "report_a_bug": "Báo lỗi", "sign_in": "Đăng nhập", "sign_out": "Đăng xuất", - "success": "Success", - "terms_of_service": "Điều khoản sử dụng", + "status": "Tình trạng", + "status_dashboard": "Trang tình trạng", + "success": "Thành công", "title": "Open Assistant", + "trollboard": "Trang giám sát", + "user_leaderboard": "Bảng xếp hạng người dùng", + "users_dashboard": "Bảng xếp hạng người dùng", + "terms_of_service": "Điều khoản sử dụng", "yes": "Có" } diff --git a/website/public/locales/vi/index.json b/website/public/locales/vi/index.json index 43022145..5f2a8088 100644 --- a/website/public/locales/vi/index.json +++ b/website/public/locales/vi/index.json @@ -1,15 +1,23 @@ { "blurb": "Đây sẽ là cuộc cách mạng công nghệ mới.", - "blurb1": "Giống như cách Stable Diffusion đã cho mọi người công cụ để làm tranh ảnh bằng AI, Open Assistant sẽ làm như vậy với con chatbot mã nguồn mở mạnh nhất thế giới.", - "description": "Chatbot trí tuệ nhân tạo mã nguồn mở, dựa trên mô hình ngôn ngữ lớn của LAION và các tình nguyện viên trên toàn thế giới.", + "blurb1": "Giống như Stable Diffusion với tranh ảnh bằng AI, Open Assistant sẽ làm tương tự như vậy với con chatbot mã nguồn mở mạnh nhất thế giới.", + "description": "Chatbot trí tuệ nhân tạo mã nguồn mở, dựa trên mô hình ngôn ngữ lớn của LAION và các tình nguyện.", "faq_items": { - "q0": "Open Assistant bây giờ thế nào rồi?", - "a0": "Dự án này đang trong giai đoạn phát triển, từ những nghiên cứu về sử dụng RLHF (học từ phản hồi con người) trong các mô hình ngôn ngữ lớn.", + "q0": "Dự án Open Assistant bây giờ thế nào rồi?", + "a0": "Dự án này đang trong giai đoạn phát triển, từ những nghiên cứu về sử dụng RLHF (học từ phản hồi con người) trong trí tuệ nhân tạo.", "q1": "Open Assistant được phát triển bởi ai?", - "a1": "Open Assistant là dự ản được phát triển bởi LAION and các tình nguyện viên trên toàn thế giới." + "a1": "Open Assistant là dự ản được phát triển bởi LAION and các tình nguyện viên.", + "q2": "Open Assistant sẽ sử dụng giấy phép gì?", + "a2": "Mã nguồn và phần mềm trí tuệ nhân tạo sẽ được phát hành dưới giấy phép the Apache 2.0.", + "q3": "Khi nào thì tôi sẽ có dữ liệu?", + "a3": "Sắp, dữ liệu sẽ phát hành dưới giấy phép CC BY 4.0.", + "q4": "Tôi tải về Open Assistant được không?", + "a4": "Yes, Open Assistant là phần mềm mã nguồn mở cho phép người dùng thích làm gì thì làm, miễn là theo giấy phép.", + "q5": "Máy tính phải mạnh đến mức nào thì mới chạy được Open Assistant?", + "a5": "Sẽ có phiên bản mà chạy được trên PC bình thường." }, "faq_title": "Câu hỏi", "join_us_description": "Các dự án mã nguồn mở được phát triển bởi những người như bạn. Triết lý mã nguồn mở là hợp tác để tạo và phát triển công nghệ mới mà làm giàu thế giới quanh ta. Bạn có muốn tham gia không? Liên hệ chúng tôi ở đây:", "join_us_title": "Tham gia", - "subtitle": "Chatbot AI cho tất cả mọi người" + "subtitle": "Chatbot AI cho mọi người" } diff --git a/website/public/locales/vi/leaderboard.json b/website/public/locales/vi/leaderboard.json index ebe25dda..d32ac203 100644 --- a/website/public/locales/vi/leaderboard.json +++ b/website/public/locales/vi/leaderboard.json @@ -1,18 +1,33 @@ { - "daily": "Ngày", - "label": "Nhãn", + "accepted": "↪ Vào bộ dữ liệu", + "accepted_prompts": "Câu đầu được vào bộ dữ liệu", + "daily": "Theo ngày", + "day": "Theo ngày", + "good_rankings": "Xếp hạng số lần like", + "label": "Số nhãn", + "labels_full": "Số nhãn đầy đủ", + "labels_simple": "Số nhãn đã lược giản", "last_updated_at": "Cập nhật lần cuối: {{val, datetime}}", "leaderboard": "Bảng xếp hạng", - "monthly": "Tháng", + "month": "Theo tháng", + "monthly": "Theo tháng", "next": "Tiếp", "overall": "Tổng quan", "previous": "Trước", - "prompt": "Câu đầu tiên", + "prompt": "Số câu đầu", "rank": "Xếp hạng", - "reply": "Câu trả lời", + "rankings": "Số lần sắp xếp", + "replies_assistant": "Số tin nhắn trợ lý", + "replies_prompter": "Số tin nhắn người dùng", + "reply": "Số câu trả lời", + "reply_ranked_1": "Số tin nhắn được đánh giá số 1", "score": "Điểm", "top_5_contributors_today": "Top 5 người đóng góp", + "total": "Tổng", "user": "Tên người dùng", "view_all": "Nhìn tất cả", - "weekly": "Tuần" + "week": "Theo tuần", + "weekly": "Theo tuần", + "your_account": "Tài khoản", + "your_stats": "Thống kê" } diff --git a/website/public/locales/vi/message.json b/website/public/locales/vi/message.json index baabe4b6..45d53060 100644 --- a/website/public/locales/vi/message.json +++ b/website/public/locales/vi/message.json @@ -1,20 +1,20 @@ { - "copy_message_id": "Copy message ID", + "copy_message_id": "Sao chép ID", "label_action": "Nhãn", "label_title": "Nhãn", "message": "Tin nhắn", - "message_deleted": "Message deleted", + "message_deleted": "Tin nhắn đã xoá", "open_new_tab_action": "Mở ở trang mới", "parent": "Tin nhắn gốc", "reactions": "Bình luận", "recent_messages": "Tin nhắn gần đây", "report_action": "Báo cáo", - "report_placeholder": "Tại sao tin nhắn này cần được báo cáo?", + "report_placeholder": "Nêu lý do để báo cáo", "report_title": "Báo cáo", "send_report": "Gửi", - "stop_tree": "Stop tree", + "stop_tree": "Dừng nhánh tin nhắn", "submit_labels": "Gửi", - "tree_stopped": "Tree stopped {{id}}", + "tree_stopped": "Nhánh tin nhắn {{id}} đã dừng", "view_user": "Xem người dùng", "your_recent_messages": "Tin nhắn gần đây của bạn" } diff --git a/website/public/locales/vi/tasks.json b/website/public/locales/vi/tasks.json index e197c410..17d00fd1 100644 --- a/website/public/locales/vi/tasks.json +++ b/website/public/locales/vi/tasks.json @@ -1,22 +1,21 @@ { - "available_task_count": "{{count}} việc", "classify_assistant_reply": { - "label": "Phân loại tin nhắn của Open Assistant", - "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn.", + "label": "Phân loại các tin nhắn của Open Assistant", + "desc": "Tạo nhãn dữ liệu để đánh giá tin nhắn.", "overview": "Từ cuộc trò truyện ở dưới, trả lời các câu hỏi về câu trả lời cuối trong cuộc trò truyện." }, "classify_initial_prompt": { - "label": "Phân loại tin nhắn đầu", - "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn.", + "label": "Phân loại các tin nhắn đầu tiên", + "desc": "Tạo nhãn dữ liệu để đánh giá tin nhắn.", "overview": "Đọc tin nhắn đầu và trả lời các câu hỏi." }, "classify_prompter_reply": { "label": "Phân loại tin nhắn người dùng", - "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn.", + "desc": "Tạo nhãn dữ liệu để đánh giá tin nhắn.", "overview": "Từ cuộc trò truyện ở dưới, trả lời các câu hỏi về câu trả lời cuối trong cuộc trò truyện." }, "create_initial_prompt": { - "label": "Tạo tin nhắn đầu", + "label": "Tạo tin nhắn đầu tiên", "desc": "Viết tin nhắn đầu tiên để làm bộ dữ liệu cho Open Assistant.", "overview": "Viết tin nhắn đầu tiên để Open Assistant trả lời", "instruction": "Viết tin nhắn đầu", @@ -27,17 +26,17 @@ "unchanged_message": "Are you sure you would like to continue?" }, "label_assistant_reply": { - "label": "Tạo nhãn cho tin nhắn của Open Assistant", + "label": "Đánh giá các tin nhắn của Open Assistant", "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn của Open Assistant.", "overview": "Từ cuộc trò truyện ở dưới, tạo nhãn dữ liệu cho tin nhắn sau." }, "label_initial_prompt": { - "label": "Tạo nhãn cho tin nhắn đầu", + "label": "Đánh giá các tin nhắn đầu tiên", "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn đầu.", "overview": "Tạo nhãn dữ liệu cho tin nhắn sau." }, "label_prompter_reply": { - "label": "Tạo nhãn cho tin nhắn người dùng", + "label": "Đánh giá các tin nhắn người dùng", "desc": "Tạo nhãn dữ liệu đánh giá tin nhắn của người dùng.", "overview": "Từ cuộc trò truyện ở dưới, tạo nhãn dữ liệu cho tin nhắn sau." }, @@ -46,28 +45,28 @@ "desc": "Giúp cải thiện Open Assistant bằng cách làm một việc ngẫu nhiên." }, "rank_assistant_replies": { - "label": "Xếp hạng câu trả lời của Open Assistant", + "label": "Xếp hạng các câu trả lời của Open Assistant", "desc": "Đánh giá độ chính xác và dễ đọc của các câu trả lời mà Open Assistant đưa ra.", "overview": "Từ những câu trả lời của Open Assistant, xếp hạng chúng theo chất lượng, tốt nhât ở trên, tệ nhất ở dưới.", "unchanged_title": "Chưa thay đổi thứ tự", "unchanged_message": "Bạn chưa thay đổi thứ tự tin nhắn. Bạn có chắc muốn lưu không?" }, "rank_initial_prompts": { - "label": "Xếp hạng tin nhắn đầu tiên", + "label": "Xếp hạng các tin nhắn đầu tiên", "desc": "Đánh giá độ chính xác và dễ đọc của các câu trả lời của tin nhắn đầu tiên.", "overview": "Từ những tin nhắn đầu sau, xếp hạng chúng theo chất lượng, tốt nhât ở trên, tệ nhất ở dưới.", "unchanged_title": "Chưa thay đổi thứ tự", "unchanged_message": "Bạn chưa thay đổi thứ tự tin nhắn. Bạn có chắc muốn lưu không?" }, "rank_user_replies": { - "label": "Xếp hạng câu trả lời của người dùng", + "label": "Xếp hạng các câu trả lời của người dùng", "desc": "Giúp cải thiện câu trả lời của Open Assistant.", "overview": "Từ những câu trả lời của người dùng, xếp hạng chúng theo chất lượng, tốt nhât ở trên, tệ nhất ở dưới.", "unchanged_title": "Chưa thay đổi thứ tự", "unchanged_message": "Bạn chưa thay đổi thứ tự tin nhắn. Bạn có chắc muốn lưu không?" }, "reply_as_assistant": { - "label": "Đóng vai Open Assistant", + "label": "Đóng vai trợ lý", "desc": "Giúp cải thiện câu trả lời của Open Assistant.", "overview": "Tạo câu trả lời phù hợp cho cuộc trò truyện dưới đây", "response_placeholder": "Viết vào đây..." @@ -78,5 +77,8 @@ "overview": "Tạo câu trả lời phù hợp cho cuộc trò truyện dưới đây", "instruction": "Viết tin nhắn trả lời", "response_placeholder": "Viết vào đây..." - } + }, + "available_task_count": "{{count}} việc", + "writing_wrong_langauge_a_b": "Tin nhắn có vẻ được viết trong ngôn ngữ {{detected_lang}} nhưng sẽ được gửi đi là ngôn ngữ {{submit_lang}}.", + "submitted_as": "Câu trả lời sẽ được gửi đi là ngôn ngữ {{submit_lang}}" } diff --git a/website/public/locales/zh/common.json b/website/public/locales/zh/common.json index bba05485..3776f825 100644 --- a/website/public/locales/zh/common.json +++ b/website/public/locales/zh/common.json @@ -6,22 +6,31 @@ "conversational": "每个人的对话式人工智能。", "copied": "已复制", "dark_mode": "深色模式", + "dashboard_home": "概览首页", "dashboard": "概览", "delete": "删除", "discord": "Discord", "docs": "文档", "github": "GitHub", + "leaderboard": "排行榜", "legal": "法律信息", "light_mode": "浅色模式", "loading": "载入中...", + "messages_dashboard": "信息概览", + "messages": "信息", "more_information": "更多信息", "no": "否", "privacy_policy": "隐私政策", "report_a_bug": "报告问题", "sign_in": "登入", "sign_out": "登出", + "status_dashboard": "状态概览", + "status": "状态", "success": "成功", "terms_of_service": "服务条款", "title": "Open Assistant", + "user_leaderboard": "用户排行榜", + "users_dashboard": "用户概览", + "users": "用户", "yes": "是" } diff --git a/website/public/locales/zh/side_menu.json b/website/public/locales/zh/side_menu.json deleted file mode 100644 index fbb69bd1..00000000 --- a/website/public/locales/zh/side_menu.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "dashboard": "概览", - "dashboard_home": "概览首页", - "messages": "信息", - "messages_dashboard": "信息概览", - "leaderboard": "排行榜", - "user_leaderboard": "用户排行榜", - "users": "用户", - "users_dashboard": "用户概览", - "status": "状态", - "status_dashboard": "状态概览" -} diff --git a/website/src/components/AdminArea.tsx b/website/src/components/AdminArea.tsx index 8e6d1229..6bc7a919 100644 --- a/website/src/components/AdminArea.tsx +++ b/website/src/components/AdminArea.tsx @@ -10,9 +10,12 @@ export const AdminArea = ({ children }: { children: ReactNode }) => { if (status === "loading") { return; } - if (session?.user.role === "admin") { + const role = session?.user.role; + + if (role === "admin" || role === "moderator") { return; } + router.push("/"); }, [router, session, status]); return
{status === "loading" ? "loading..." : children}
; diff --git a/website/src/components/DataTable.tsx b/website/src/components/DataTable/DataTable.tsx similarity index 90% rename from website/src/components/DataTable.tsx rename to website/src/components/DataTable/DataTable.tsx index 35246bf7..3e75f32e 100644 --- a/website/src/components/DataTable.tsx +++ b/website/src/components/DataTable/DataTable.tsx @@ -23,13 +23,23 @@ import { Tr, useDisclosure, } from "@chakra-ui/react"; -import { Cell, ColumnDef, flexRender, getCoreRowModel, Row, useReactTable } from "@tanstack/react-table"; +import { + Cell, + ColumnDef, + ExpandedState, + flexRender, + getCoreRowModel, + getExpandedRowModel, + Row, + useReactTable, +} from "@tanstack/react-table"; import { Filter } from "lucide-react"; import { useTranslation } from "next-i18next"; -import { ChangeEvent, ReactNode } from "react"; +import { ChangeEvent, ReactNode, useState } from "react"; import { useDebouncedCallback } from "use-debounce"; -export type DataTableColumnDef = ColumnDef & { +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export type DataTableColumnDef = ColumnDef & { filterable?: boolean; span?: number | ((cell: Cell) => number | undefined); }; @@ -54,6 +64,7 @@ export type DataTableProps = { disablePrevious?: boolean; disablePagination?: boolean; rowProps?: TableRowProps | DataTableRowPropsCallback; + getSubRows?: (row: T) => T[] | undefined; }; export const DataTable = ({ @@ -68,12 +79,21 @@ export const DataTable = ({ disablePrevious, disablePagination, rowProps, + getSubRows, }: DataTableProps) => { const { t } = useTranslation("leaderboard"); + const [expanded, setExpanded] = useState({}); + const { getHeaderGroups, getRowModel } = useReactTable({ data, columns, getCoreRowModel: getCoreRowModel(), + getExpandedRowModel: getExpandedRowModel(), + state: { + expanded, + }, + getSubRows, + onExpandedChange: setExpanded, }); const handleFilterChange = (value: FilterItem) => { diff --git a/website/src/components/DataTable/DataTableAction.tsx b/website/src/components/DataTable/DataTableAction.tsx new file mode 100644 index 00000000..de4f0b72 --- /dev/null +++ b/website/src/components/DataTable/DataTableAction.tsx @@ -0,0 +1,10 @@ +import { forwardRef, IconButton, IconButtonProps } from "@chakra-ui/react"; +import { LucideIcon } from "lucide-react"; + +export type DataTableActionProps = Omit & { icon: LucideIcon }; + +// need to use forwardRef from Charka to support `as` props +// https://chakra-ui.com/community/recipes/as-prop +export const DataTableAction = forwardRef((props: DataTableActionProps, ref) => { + return } ref={ref} />; +}); diff --git a/website/src/components/DataTable/jsonExpandRowModel.tsx b/website/src/components/DataTable/jsonExpandRowModel.tsx new file mode 100644 index 00000000..84c70e20 --- /dev/null +++ b/website/src/components/DataTable/jsonExpandRowModel.tsx @@ -0,0 +1,60 @@ +import { Card, CardBody, Flex } from "@chakra-ui/react"; +import { Cell, CellContext } from "@tanstack/react-table"; +import { ChevronDown, ChevronRight } from "lucide-react"; + +type ExpandableRow = Omit & { + shouldExpand?: boolean; +}; + +export const createJsonExpandRowModel = () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const renderCell = ({ row, getValue }: CellContext, any>) => { + if (!row.original.shouldExpand) { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { shouldExpand, ...res } = row.original; + return ( + + +
{JSON.stringify(res, null, 2)}
+
+
+ ); + } + + return ( + + {row.getCanExpand() ? ( + + ) : null}{" "} + {getValue()} + + ); + }; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const span = (cell: Cell, any>) => + cell.row.original.shouldExpand ? undefined : cell.row.getVisibleCells().length; + + const getSubRows = (row: ExpandableRow) => + row.shouldExpand + ? [ + { + ...row, + shouldExpand: false, + }, + ] + : undefined; + + const toExpandable = function (arr: T[] | undefined, val = true): ExpandableRow[] { + return !arr ? [] : arr.map((element) => ({ ...element, shouldExpand: val })); + }; + + return { renderCell, span, getSubRows, toExpandable }; +}; diff --git a/website/src/components/DataTable/useCursorPagination.ts b/website/src/components/DataTable/useCursorPagination.ts new file mode 100644 index 00000000..28e4396e --- /dev/null +++ b/website/src/components/DataTable/useCursorPagination.ts @@ -0,0 +1,40 @@ +import { useState } from "react"; + +export interface CursorPaginationState { + /** + * The user's `display_name` used for pagination. + */ + cursor: string; + + /** + * The pagination direction. + */ + direction: "forward" | "back"; +} + +export const useCursorPagination = () => { + const [pagination, setPagination] = useState({ cursor: "", direction: "forward" }); + + const toPreviousPage = (data: undefined | { prev?: string; next?: string }) => { + setPagination({ + cursor: data?.prev || "", + direction: "back", + }); + }; + + const toNextPage = (data: undefined | { prev?: string; next?: string }) => { + setPagination({ + cursor: data?.next || "", + direction: "forward", + }); + }; + + const resetCursor = () => setPagination((old) => ({ ...old, cursor: "" })); + + return { + pagination, + toNextPage, + toPreviousPage, + resetCursor, + }; +}; diff --git a/website/src/components/Header/UserMenu.tsx b/website/src/components/Header/UserMenu.tsx index 8b5de035..aba03f05 100644 --- a/website/src/components/Header/UserMenu.tsx +++ b/website/src/components/Header/UserMenu.tsx @@ -1,5 +1,6 @@ import { Avatar, + Badge, Box, Link, Menu, @@ -16,6 +17,7 @@ import NextLink from "next/link"; import { signOut, useSession } from "next-auth/react"; import { useTranslation } from "next-i18next"; import React, { ElementType, useCallback } from "react"; +import { useHasAnyRole } from "src/hooks/auth/useHasAnyRole"; interface MenuOption { name: string; @@ -31,7 +33,7 @@ export function UserMenu() { signOut({ callbackUrl: "/" }); }, []); const { data: session, status } = useSession(); - + const isAdminOrMod = useHasAnyRole(["admin", "moderator"]); if (!session || status !== "authenticated") { return null; } @@ -56,7 +58,7 @@ export function UserMenu() { }, ]; - if (session.user.role === "admin") { + if (isAdminOrMod) { options.unshift({ name: t("admin_dashboard"), href: "/admin", @@ -69,7 +71,7 @@ export function UserMenu() { - + {session.user.name || "New User"} @@ -77,7 +79,14 @@ export function UserMenu() { - {session.user.name} + + {session.user.name} + {isAdminOrMod ? ( + + {session.user.role} + + ) : null} + {/* 3,200 */} diff --git a/website/src/components/JsonCard.tsx b/website/src/components/JsonCard.tsx new file mode 100644 index 00000000..973977b4 --- /dev/null +++ b/website/src/components/JsonCard.tsx @@ -0,0 +1,12 @@ +import { Card, CardBody } from "@chakra-ui/card"; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +export const JsonCard = ({ children }: { children: any }) => { + return ( + + +
{JSON.stringify(children, null, 2)}
+
+
+ ); +}; diff --git a/website/src/components/LanguageSelector/LanguageSelector.tsx b/website/src/components/LanguageSelector/LanguageSelector.tsx index 4ed9ede9..06a4096b 100644 --- a/website/src/components/LanguageSelector/LanguageSelector.tsx +++ b/website/src/components/LanguageSelector/LanguageSelector.tsx @@ -3,6 +3,7 @@ import { useRouter } from "next/router"; import { useTranslation } from "next-i18next"; import { useCallback, useEffect, useMemo } from "react"; import { useCookies } from "react-cookie"; +import { getLocaleDisplayName } from "src/lib/languages"; const LanguageSelector = () => { const router = useRouter(); @@ -20,15 +21,11 @@ const LanguageSelector = () => { } }, [cookies, setCookie, router]); - const firstLetterUppercase = (str) => { - return str.charAt(0).toLocaleUpperCase() + str.slice(1); - }; - // Memo the set of locales and their display names. const localesAndNames = useMemo(() => { return router.locales.map((locale) => ({ locale, - name: firstLetterUppercase(new Intl.DisplayNames([locale], { type: "language" }).of(locale)), + name: getLocaleDisplayName(locale), })); }, [router.locales]); diff --git a/website/src/components/LeaderboardTable/LeaderboardTable.tsx b/website/src/components/LeaderboardTable/LeaderboardTable.tsx index d59fc902..95cbd376 100644 --- a/website/src/components/LeaderboardTable/LeaderboardTable.tsx +++ b/website/src/components/LeaderboardTable/LeaderboardTable.tsx @@ -2,19 +2,20 @@ import { Box, CircularProgress, Flex, Link, useColorModeValue } from "@chakra-ui import { createColumnHelper } from "@tanstack/react-table"; import { MoreHorizontal } from "lucide-react"; import NextLink from "next/link"; -import { useSession } from "next-auth/react"; import { useTranslation } from "next-i18next"; import React, { useMemo } from "react"; +import { useHasAnyRole } from "src/hooks/auth/useHasAnyRole"; import { LeaderboardEntity, LeaderboardReply, LeaderboardTimeFrame } from "src/types/Leaderboard"; -import { DataTable, DataTableColumnDef } from "../DataTable"; +import { DataTable, DataTableColumnDef } from "../DataTable/DataTable"; +import { createJsonExpandRowModel } from "../DataTable/jsonExpandRowModel"; import { useBoardPagination } from "./useBoardPagination"; import { useBoardRowProps } from "./useBoardRowProps"; import { useFetchBoard } from "./useFetchBoard"; type WindowLeaderboardEntity = LeaderboardEntity & { isSpaceRow?: boolean }; const columnHelper = createColumnHelper(); - +const jsonExpandRowModel = createJsonExpandRowModel(); /** * Presents a grid of leaderboard entries with more detailed information. */ @@ -39,22 +40,29 @@ export const LeaderboardTable = ({ } = useFetchBoard( `/api/leaderboard?time_frame=${timeFrame}&limit=${limit}&includeUserStats=${!hideCurrentUserRanking}` ); - const { data: session } = useSession(); - const isAdmin = session?.user?.role === "admin"; + const isAdminOrMod = useHasAnyRole(["admin", "moderator"]); + const columns: DataTableColumnDef[] = useMemo( () => [ { ...columnHelper.accessor("rank", { header: t("rank"), - cell: ({ row, getValue }) => (row.original.isSpaceRow ? : getValue()), + cell: (ctx) => + ctx.row.original.isSpaceRow ? ( + + ) : isAdminOrMod ? ( + jsonExpandRowModel.renderCell(ctx) + ) : ( + ctx.getValue() + ), }), - span: (cell) => (cell.row.original.isSpaceRow ? 6 : undefined), + span: (cell) => (cell.row.original.isSpaceRow ? 6 : jsonExpandRowModel.span(cell)), }, columnHelper.accessor("display_name", { header: t("user"), cell: ({ getValue, row }) => - isAdmin ? ( + isAdminOrMod ? ( {getValue()} @@ -75,24 +83,24 @@ export const LeaderboardTable = ({ header: t("label"), }), ], - [isAdmin, t] + [isAdminOrMod, t] ); const { data: paginatedData, end, ...pagnationProps - } = useBoardPagination({ rowPerPage, data: reply?.leaderboard, limit }); - const data: WindowLeaderboardEntity[] = useMemo(() => { - if (hideCurrentUserRanking || !reply?.user_stats_window) { + } = useBoardPagination({ rowPerPage, data: jsonExpandRowModel.toExpandable(reply?.leaderboard || []), limit }); + const data = useMemo(() => { + if (hideCurrentUserRanking || !reply?.user_stats_window || reply.user_stats_window.length === 0) { return paginatedData; } - const userStatsWindow: WindowLeaderboardEntity[] = reply.user_stats_window; + const userStatsWindow: WindowLeaderboardEntity[] = jsonExpandRowModel.toExpandable(reply.user_stats_window); const userStats = userStatsWindow.find((stats) => stats.highlighted); if (userStats && userStats.rank > end) { paginatedData.push( { isSpaceRow: true } as WindowLeaderboardEntity, - ...reply.user_stats_window.filter( + ...userStatsWindow.filter( (stats) => paginatedData.findIndex((leaderBoardEntity) => leaderBoardEntity.user_id === stats.user_id) === -1 ) // filter to avoid duplicated row ); @@ -116,6 +124,7 @@ export const LeaderboardTable = ({ columns={columns} caption={lastUpdated} rowProps={rowProps} + getSubRows={jsonExpandRowModel.getSubRows} {...pagnationProps} > ); diff --git a/website/src/components/LeaderboardTable/TrollboardTable.tsx b/website/src/components/LeaderboardTable/TrollboardTable.tsx index 1b1ea118..0c971655 100644 --- a/website/src/components/LeaderboardTable/TrollboardTable.tsx +++ b/website/src/components/LeaderboardTable/TrollboardTable.tsx @@ -1,26 +1,42 @@ -import { Box, CircularProgress, Flex, Link } from "@chakra-ui/react"; +import { Box, CircularProgress, Flex, IconButton, Link, Tooltip } from "@chakra-ui/react"; import { createColumnHelper } from "@tanstack/react-table"; -import { ThumbsDown, ThumbsUp } from "lucide-react"; +import { Mail, ThumbsDown, ThumbsUp, User } from "lucide-react"; import NextLink from "next/link"; import { FetchTrollBoardResponse, TrollboardEntity, TrollboardTimeFrame } from "src/types/Trollboard"; -import { DataTable } from "../DataTable"; +import { DataTable, DataTableColumnDef } from "../DataTable/DataTable"; +import { createJsonExpandRowModel } from "../DataTable/jsonExpandRowModel"; +import { Discord } from "../Icons/Discord"; import { useBoardPagination } from "./useBoardPagination"; import { useBoardRowProps } from "./useBoardRowProps"; import { useFetchBoard } from "./useFetchBoard"; + const columnHelper = createColumnHelper(); - const toPercentage = (num: number) => `${Math.round(num * 100)}%`; +const jsonExpandRowModel = createJsonExpandRowModel(); -const columns = [ - columnHelper.accessor("rank", {}), +const columns: DataTableColumnDef[] = [ + { + ...columnHelper.accessor("rank", { + cell: jsonExpandRowModel.renderCell, + }), + span: jsonExpandRowModel.span, + }, columnHelper.accessor("display_name", { header: "Display name", - cell: ({ getValue, row }) => ( - - {getValue()} - - ), + cell: ({ getValue, row }) => { + const isEmail = row.original.auth_method === "local"; + return ( + + + {getValue()} + + + {isEmail ? : } + + + ); + }, }), columnHelper.accessor("troll_score", { header: "Troll score", @@ -45,36 +61,19 @@ const columns = [ columnHelper.accessor((row) => row.spam + row.spam_prompts, { header: "Spam", }), - columnHelper.accessor("lang_mismach", { - header: "Lang mismach", - }), - columnHelper.accessor("not_appropriate", { - header: "Not appropriate", - }), - columnHelper.accessor("pii", {}), - columnHelper.accessor("hate_speech", { - header: "Hate speech", - }), - columnHelper.accessor("sexual_content", { - header: "Sexual Content", - }), - columnHelper.accessor("political_content", { - header: "Political Content", - }), - columnHelper.accessor("quality", { - cell: ({ getValue }) => toPercentage(getValue()), - }), - columnHelper.accessor("helpfulness", { - cell: ({ getValue }) => toPercentage(getValue()), - }), - columnHelper.accessor("humor", { - cell: ({ getValue }) => toPercentage(getValue()), - }), - columnHelper.accessor("violence", { - cell: ({ getValue }) => toPercentage(getValue()), - }), columnHelper.accessor("toxicity", { - cell: ({ getValue }) => toPercentage(getValue()), + cell: ({ getValue }) => toPercentage(getValue() || 0), + }), + columnHelper.accessor((row) => row.user_id, { + header: "Actions", + cell: ({ row }) => ( + } + > + ), }), ]; @@ -94,7 +93,11 @@ export const TrollboardTable = ({ lastUpdated, } = useFetchBoard(`/api/admin/trollboard?time_frame=${timeFrame}&limit=${limit}`); - const { data, ...paginationProps } = useBoardPagination({ rowPerPage, data: trollboardRes?.trollboard, limit }); + const { data, ...paginationProps } = useBoardPagination({ + rowPerPage, + data: jsonExpandRowModel.toExpandable(trollboardRes?.trollboard), + limit, + }); const rowProps = useBoardRowProps(); if (isLoading) { return ; @@ -112,11 +115,12 @@ export const TrollboardTable = ({ }, }} > - +
diff --git a/website/src/components/LeaderboardTable/useBoardRowProps.ts b/website/src/components/LeaderboardTable/useBoardRowProps.ts index 32f3fc56..be0fe7de 100644 --- a/website/src/components/LeaderboardTable/useBoardRowProps.ts +++ b/website/src/components/LeaderboardTable/useBoardRowProps.ts @@ -2,7 +2,7 @@ import { useColorModeValue, useToken } from "@chakra-ui/react"; import { useCallback } from "react"; import { colors } from "src/styles/Theme/colors"; -import { DataTableRowPropsCallback } from "../DataTable"; +import { DataTableRowPropsCallback } from "../DataTable/DataTable"; export const useBoardRowProps = () => { const borderColor = useToken("colors", useColorModeValue(colors.light.active, colors.dark.active)); diff --git a/website/src/components/Messages/AdminMessageTable.tsx b/website/src/components/Messages/AdminMessageTable.tsx new file mode 100644 index 00000000..90866459 --- /dev/null +++ b/website/src/components/Messages/AdminMessageTable.tsx @@ -0,0 +1,110 @@ +import { Avatar } from "@chakra-ui/avatar"; +import { Badge, Flex } from "@chakra-ui/layout"; +import { Tooltip } from "@chakra-ui/react"; +import { createColumnHelper } from "@tanstack/table-core"; +import { formatDistanceToNow, formatISO9075 } from "date-fns"; +import { Eye } from "lucide-react"; +import NextLink from "next/link"; +import { ROUTES } from "src/lib/routes"; +import { Message } from "src/types/Conversation"; +import { isKnownEmoji } from "src/types/Emoji"; +import { StrictOmit } from "src/types/utils"; + +import { DataTable, DataTableProps } from "../DataTable/DataTable"; +import { DataTableAction } from "../DataTable/DataTableAction"; +import { MessageEmojiButton } from "./MessageEmojiButton"; + +const columnHelper = createColumnHelper(); + +const columns = [ + columnHelper.accessor("text", { + cell: ({ getValue, row }) => { + const limit = 80; + const text = getValue(); + const renderText = text.length > limit ? `${text.slice(0, limit)}...` : text; + return ( + + + {renderText} + {row.original.deleted && ( + + Deleted + + )} + + ); + }, + }), + columnHelper.accessor("lang", { + header: "Language", + cell: ({ getValue }) => {getValue()}, + }), + columnHelper.accessor("emojis", { + header: "Reactions", + cell: ({ getValue, row }) => { + const emojis = getValue(); + + emojis["+1"] = emojis["+1"] || 0; + emojis["-1"] = emojis["-1"] || 0; + + return ( + + {Object.entries(emojis) + .filter(([emoji]) => isKnownEmoji(emoji)) + .sort(([emoji]) => -emoji) + .map(([emoji, count]) => { + return ( + + ); + })} + + ); + }, + }), + columnHelper.accessor("created_date", { + header: "Date", + cell: ({ getValue }) => { + return {getValue()}; + }, + }), + columnHelper.accessor((row) => row.id, { + header: "Actions", + cell: ({ getValue }) => ( + + ), + }), +]; +// TODO move this to somewhere +const DateDiff = ({ children }: { children: string | Date | number }) => { + const date = new Date(children); + const diff = formatDistanceToNow(date, { addSuffix: true }); + return ( + + {diff} + + ); +}; + +export const AdminMessageTable = (props: StrictOmit, "columns">) => { + return ; +}; diff --git a/website/src/components/Messages/LabelFlagGroup.tsx b/website/src/components/Messages/LabelFlagGroup.tsx index 7e135d24..7598627e 100644 --- a/website/src/components/Messages/LabelFlagGroup.tsx +++ b/website/src/components/Messages/LabelFlagGroup.tsx @@ -2,6 +2,7 @@ import { Button, Flex, Tooltip } from "@chakra-ui/react"; import { useTranslation } from "next-i18next"; import { useCookies } from "react-cookie"; import { getTypeSafei18nKey } from "src/lib/i18n"; +import { getLocaleDisplayName } from "src/lib/languages"; interface LabelFlagGroupProps { values: number[]; @@ -21,7 +22,7 @@ export const LabelFlagGroup = ({ const { t } = useTranslation("labelling"); const [cookies] = useCookies(["NEXT_LOCALE"]); const currentLanguage = cookies["NEXT_LOCALE"]; - const expectedLanguageName = new Intl.DisplayNames(currentLanguage, { type: "language" }).of(expectedLanguage); + const expectedLanguageName = getLocaleDisplayName(expectedLanguage, currentLanguage); return ( {labelNames.map((name, idx) => ( diff --git a/website/src/components/Messages/MessageTable.stories.tsx b/website/src/components/Messages/MessageConversation.stories.tsx similarity index 96% rename from website/src/components/Messages/MessageTable.stories.tsx rename to website/src/components/Messages/MessageConversation.stories.tsx index 772d1b52..c6d6acb3 100644 --- a/website/src/components/Messages/MessageTable.stories.tsx +++ b/website/src/components/Messages/MessageConversation.stories.tsx @@ -2,12 +2,12 @@ import { SessionProvider } from "next-auth/react"; import React from "react"; import { Message } from "src/types/Conversation"; -import { MessageTable } from "./MessageTable"; +import { MessageConversation } from "./MessageConversation"; // eslint-disable-next-line import/no-anonymous-default-export export default { title: "Messages/MessageTable", - component: MessageTable, + component: MessageConversation, }; const Template = ({ @@ -21,7 +21,7 @@ const Template = ({ }) => { return ( - ; + ; ); }; diff --git a/website/src/components/Messages/MessageTable.tsx b/website/src/components/Messages/MessageConversation.tsx similarity index 79% rename from website/src/components/Messages/MessageTable.tsx rename to website/src/components/Messages/MessageConversation.tsx index 2d39f346..4efdfb45 100644 --- a/website/src/components/Messages/MessageTable.tsx +++ b/website/src/components/Messages/MessageConversation.tsx @@ -2,13 +2,13 @@ import { Stack } from "@chakra-ui/react"; import { MessageTableEntry } from "src/components/Messages/MessageTableEntry"; import { Message } from "src/types/Conversation"; -interface MessageTableProps { +interface MessageConversationProps { messages: Message[]; enableLink?: boolean; highlightLastMessage?: boolean; } -export function MessageTable({ messages, enableLink, highlightLastMessage }: MessageTableProps) { +export function MessageConversation({ messages, enableLink, highlightLastMessage }: MessageConversationProps) { return ( {messages.map((message, idx) => ( diff --git a/website/src/components/Messages/MessageEmojiButton.stories.tsx b/website/src/components/Messages/MessageEmojiButton.stories.tsx index b083c966..5d3e8be6 100644 --- a/website/src/components/Messages/MessageEmojiButton.stories.tsx +++ b/website/src/components/Messages/MessageEmojiButton.stories.tsx @@ -11,17 +11,16 @@ export default { const Template = ({ emoji, count, - checked, - showCount, + ...rest }: { emoji: string; count: number; checked?: boolean; - showCount: boolean; + userIsAuthor: boolean; + disabled?: boolean; + userReacted: boolean; }) => { - return ( - - ); + return ; }; export const Default = Template.bind({}); @@ -29,7 +28,9 @@ Default.args = { emoji: "+1", count: 7, checked: false, - showCount: true, + userIsAuthor: false, + disabled: false, + userReacted: true, }; export const BigNumber = Template.bind({}); diff --git a/website/src/components/Messages/MessageEmojiButton.tsx b/website/src/components/Messages/MessageEmojiButton.tsx index f140a789..2c0c2442 100644 --- a/website/src/components/Messages/MessageEmojiButton.tsx +++ b/website/src/components/Messages/MessageEmojiButton.tsx @@ -1,17 +1,35 @@ -import { Button } from "@chakra-ui/react"; +import { Button, ButtonProps } from "@chakra-ui/react"; +import { useHasAnyRole } from "src/hooks/auth/useHasAnyRole"; import { MessageEmoji } from "src/types/Conversation"; import { emojiIcons } from "src/types/Emoji"; interface MessageEmojiButtonProps { emoji: MessageEmoji; checked?: boolean; - onClick: () => void; - showCount: boolean; + onClick?: () => void; + userIsAuthor: boolean; + disabled?: boolean; + userReacted: boolean; + sx?: ButtonProps["sx"]; } -export const MessageEmojiButton = ({ emoji, checked, onClick, showCount }: MessageEmojiButtonProps) => { +export const MessageEmojiButton = ({ + emoji, + checked, + onClick, + userIsAuthor, + disabled, + userReacted, + sx, +}: MessageEmojiButtonProps) => { const EmojiIcon = emojiIcons.get(emoji.name); - if (!EmojiIcon) return <>; + const isAdminOrMod = useHasAnyRole(["admin", "moderator"]); + + if (!EmojiIcon) return null; + + const isDisabled = !!(userIsAuthor ? true : disabled); + const showCount = (emoji.count > 0 && userReacted) || userIsAuthor || isAdminOrMod; + return ( ); }; diff --git a/website/src/components/Messages/MessageTableEntry.tsx b/website/src/components/Messages/MessageTableEntry.tsx index a82cc50c..d9686786 100644 --- a/website/src/components/Messages/MessageTableEntry.tsx +++ b/website/src/components/Messages/MessageTableEntry.tsx @@ -22,7 +22,7 @@ import { useCallback, useEffect, useMemo, useState } from "react"; import { LabelMessagePopup } from "src/components/Messages/LabelPopup"; import { MessageEmojiButton } from "src/components/Messages/MessageEmojiButton"; import { ReportPopup } from "src/components/Messages/ReportPopup"; -import { useHasRole } from "src/hooks/auth/useHasRole"; +import { useHasAnyRole } from "src/hooks/auth/useHasAnyRole"; import { del, post, put } from "src/lib/api"; import { colors } from "src/styles/Theme/colors"; import { Message, MessageEmojis } from "src/types/Conversation"; @@ -116,7 +116,8 @@ export function MessageTableEntry({ message, enabled, highlight }: MessageTableE key={emoji} emoji={{ name: emoji, count }} checked={emojiState.user_emojis.includes(emoji)} - showCount={emojiState.user_emojis.filter((emoji) => emoji === "+1" || emoji === "-1").length > 0} + userReacted={emojiState.user_emojis.length > 0} + userIsAuthor={message.user_is_author} onClick={() => react(emoji, !emojiState.user_emojis.includes(emoji))} /> ); @@ -209,7 +210,7 @@ const MessageActions = ({ }); }; - const isAdmin = useHasRole("admin"); + const isAdminOrMod = useHasAnyRole(["admin", "moderator"]); return ( @@ -237,12 +238,12 @@ const MessageActions = ({ handleCopy(`${window.location.protocol}://${window.location.host}/messages/${id}`)} + onClick={() => handleCopy(`${window.location.protocol}//${window.location.host}/messages/${id}`)} icon={} > {t("copy_message_link")} - {!!isAdmin && ( + {!!isAdminOrMod && ( <> handleCopy(id)} icon={}> diff --git a/website/src/components/RoleSelect.tsx b/website/src/components/RoleSelect.tsx index d39d3868..970ffdce 100644 --- a/website/src/components/RoleSelect.tsx +++ b/website/src/components/RoleSelect.tsx @@ -2,7 +2,7 @@ import { Select, SelectProps } from "@chakra-ui/react"; import { forwardRef } from "react"; import { ElementOf } from "src/types/utils"; -export const roles = ["general", "admin", "banned"] as const; +export const roles = ["general", "admin", "banned", "moderator"] as const; export type Role = ElementOf; type RoleSelectProps = Omit & { diff --git a/website/src/components/SideMenu.tsx b/website/src/components/SideMenu.tsx index 7ae67612..6988d1fc 100644 --- a/website/src/components/SideMenu.tsx +++ b/website/src/components/SideMenu.tsx @@ -17,7 +17,7 @@ export interface SideMenuProps { export function SideMenu(props: SideMenuProps) { const router = useRouter(); - const { t } = useTranslation(["side_menu", "common"]); + const { t } = useTranslation(); return (
diff --git a/website/src/components/Survey/TrackedTextarea.tsx b/website/src/components/Survey/TrackedTextarea.tsx index bc86a864..1b06d6e8 100644 --- a/website/src/components/Survey/TrackedTextarea.tsx +++ b/website/src/components/Survey/TrackedTextarea.tsx @@ -5,6 +5,7 @@ import { useTranslation } from "next-i18next"; import React from "react"; import { useCookies } from "react-cookie"; import { LanguageAbbreviations } from "src/lib/iso6393"; +import { getLocaleDisplayName } from "src/lib/languages"; import { colors } from "src/styles/Theme/colors"; interface TrackedTextboxProps { @@ -80,8 +81,8 @@ export const TrackedTextarea = (props: TrackedTextboxProps) => { > {detectedLang} diff --git a/website/src/components/Tasks/CreateTask.tsx b/website/src/components/Tasks/CreateTask.tsx index 448ad287..ee9b71c1 100644 --- a/website/src/components/Tasks/CreateTask.tsx +++ b/website/src/components/Tasks/CreateTask.tsx @@ -1,7 +1,7 @@ import { Box, Stack, Text, useColorModeValue } from "@chakra-ui/react"; import { useTranslation } from "next-i18next"; import { useState } from "react"; -import { MessageTable } from "src/components/Messages/MessageTable"; +import { MessageConversation } from "src/components/Messages/MessageConversation"; import { TrackedTextarea } from "src/components/Survey/TrackedTextarea"; import { TwoColumnsWithCards } from "src/components/Survey/TwoColumnsWithCards"; import { TaskSurveyProps } from "src/components/Tasks/Task"; @@ -44,7 +44,7 @@ export const CreateTask = ({ {task.type !== TaskType.initial_prompt && ( - + )} diff --git a/website/src/components/Tasks/EvaluateTask.tsx b/website/src/components/Tasks/EvaluateTask.tsx index 2f24e6f1..ee0ba881 100644 --- a/website/src/components/Tasks/EvaluateTask.tsx +++ b/website/src/components/Tasks/EvaluateTask.tsx @@ -1,6 +1,6 @@ import { Box, useColorModeValue } from "@chakra-ui/react"; import { useEffect, useState } from "react"; -import { MessageTable } from "src/components/Messages/MessageTable"; +import { MessageConversation } from "src/components/Messages/MessageConversation"; import { Sortable } from "src/components/Sortable/Sortable"; import { SurveyCard } from "src/components/Survey/SurveyCard"; import { TaskSurveyProps } from "src/components/Tasks/Task"; @@ -47,7 +47,7 @@ export const EvaluateTask = ({ - + - + { - const url = path || "/api/messages/user"; - const { data: messages, isLoading } = useSWR(url, get, { - refreshInterval: 2000, - }); - // TODO(#651): This box coloring and styling is used in multiple places. We - // should factor it into a common ui component. - const boxBgColor = useColorModeValue("white", "gray.700"); - const boxAccentColor = useColorModeValue("gray.200", "gray.900"); - - return ( - - {isLoading ? : } - - ); -}; - -export { UserMessagesCell }; diff --git a/website/src/components/UserMessagesCell/index.tsx b/website/src/components/UserMessagesCell/index.tsx deleted file mode 100644 index c32b1c1f..00000000 --- a/website/src/components/UserMessagesCell/index.tsx +++ /dev/null @@ -1 +0,0 @@ -export * from "./UserMessagesCell"; diff --git a/website/src/components/UserTable.tsx b/website/src/components/UserTable.tsx index ab05d065..385c9d06 100644 --- a/website/src/components/UserTable.tsx +++ b/website/src/components/UserTable.tsx @@ -7,19 +7,8 @@ import { get } from "src/lib/api"; import type { FetchUsersResponse, User } from "src/types/Users"; import useSWR from "swr"; -import { DataTable, DataTableColumnDef, FilterItem } from "./DataTable"; - -interface Pagination { - /** - * The user's `display_name` used for pagination. - */ - cursor: string; - - /** - * The pagination direction. - */ - direction: "forward" | "back"; -} +import { DataTable, DataTableColumnDef, FilterItem } from "./DataTable/DataTable"; +import { useCursorPagination } from "./DataTable/useCursorPagination"; const columnHelper = createColumnHelper(); @@ -56,12 +45,13 @@ const columns: DataTableColumnDef[] = [ ]; export const UserTable = memo(function UserTable() { - const [pagination, setPagination] = useState({ cursor: "", direction: "forward" }); + const { pagination, resetCursor, toNextPage, toPreviousPage } = useCursorPagination(); const [filterValues, setFilterValues] = useState([]); const handleFilterValuesChange = (values: FilterItem[]) => { setFilterValues(values); - setPagination((old) => ({ ...old, cursor: "" })); + resetCursor(); }; + // Fetch and save the users. // This follows useSWR's recommendation for simple pagination: // https://swr.vercel.app/docs/pagination#when-to-use-useswr @@ -74,20 +64,6 @@ export const UserTable = memo(function UserTable() { } ); - const toPreviousPage = () => { - setPagination({ - cursor: data?.prev || "", - direction: "back", - }); - }; - - const toNextPage = () => { - setPagination({ - cursor: data?.next || "", - direction: "forward", - }); - }; - return ( @@ -95,8 +71,8 @@ export const UserTable = memo(function UserTable() { data={data?.items || []} columns={columns} caption="Users" - onNextClick={toNextPage} - onPreviousClick={toPreviousPage} + onNextClick={() => toNextPage(data)} + onPreviousClick={() => toPreviousPage(data)} disableNext={!data?.next} disablePrevious={!data?.prev} filterValues={filterValues} diff --git a/website/src/hooks/auth/useHasAnyRole.ts b/website/src/hooks/auth/useHasAnyRole.ts new file mode 100644 index 00000000..1fb3b472 --- /dev/null +++ b/website/src/hooks/auth/useHasAnyRole.ts @@ -0,0 +1,8 @@ +import { useSession } from "next-auth/react"; +import { Role } from "src/components/RoleSelect"; + +export const useHasAnyRole = (roles: Role[]) => { + const { data: session } = useSession(); + + return roles.some((role) => role === session?.user?.role); +}; diff --git a/website/src/lib/auth.ts b/website/src/lib/auth.ts index ee004ba9..1b9b0887 100644 --- a/website/src/lib/auth.ts +++ b/website/src/lib/auth.ts @@ -32,4 +32,18 @@ const withRole = (role: Role, handler: (arg0: NextApiRequest, arg1: NextApiRespo }; }; +export const withAnyRole = ( + roles: Role[], + handler: (arg0: NextApiRequest, arg1: NextApiResponse, token: JWT) => void +) => { + return async (req: NextApiRequest, res: NextApiResponse) => { + const token = await getToken({ req }); + if (!token || roles.every((role) => token.role !== role)) { + res.status(403).end(); + return; + } + return handler(req, res, token); + }; +}; + export { withoutRole, withRole }; diff --git a/website/src/lib/languages.ts b/website/src/lib/languages.ts new file mode 100644 index 00000000..1e6e1768 --- /dev/null +++ b/website/src/lib/languages.ts @@ -0,0 +1,13 @@ +/** + * Returns the locale's name. + */ +export const getLocaleDisplayName = (locale, displayLocale = undefined) => { + // Different browsers seem to handle "eu" differently from the Node server. + // Special case this to avoid a hydration failure. + if (locale === "eu") { + return "Euskara"; + } + const displayName = new Intl.DisplayNames([displayLocale || locale], { type: "language" }).of(locale); + // Return the Titlecased version of the language name. + return displayName.charAt(0).toLocaleUpperCase() + displayName.slice(1); +}; diff --git a/website/src/lib/oasst_api_client.ts b/website/src/lib/oasst_api_client.ts index 826fc195..a3607323 100644 --- a/website/src/lib/oasst_api_client.ts +++ b/website/src/lib/oasst_api_client.ts @@ -1,4 +1,4 @@ -import type { EmojiOp, Message } from "src/types/Conversation"; +import type { EmojiOp, FetchUserMessagesCursorResponse, Message } from "src/types/Conversation"; import { LeaderboardReply, LeaderboardTimeFrame } from "src/types/Leaderboard"; import type { AvailableTasks } from "src/types/Task"; import { FetchTrollBoardResponse, TrollboardTimeFrame } from "src/types/Trollboard"; @@ -264,11 +264,33 @@ export class OasstApiClient { return this.get(`/api/v1/users/${user_id}/messages`); } + async fetch_user_messages_cursor( + user_id: string, + { + direction, + cursor, + ...rest + }: { include_deleted?: boolean; max_count?: number; cursor?: string; direction: "forward" | "back"; desc?: boolean } + ) { + return this.get(`/api/v1/users/${user_id}/messages/cursor`, { + ...rest, + after: direction === "forward" ? cursor : undefined, + before: direction === "back" ? cursor : undefined, + }); + } + /** * Updates the backend's knowledge about the `user_id`. */ - async set_user_status(user_id: string, is_enabled: boolean, notes: string): Promise { - await this.put(`/api/v1/users/${user_id}?enabled=${is_enabled}¬es=${notes}`); + async set_user_status( + user_id: string, + is_enabled: boolean, + notes: string, + show_on_leaderboard: boolean + ): Promise { + await this.put( + `/api/v1/users/${user_id}?enabled=${is_enabled}¬es=${notes}&show_on_leaderboard=${show_on_leaderboard}` + ); } /** @@ -314,8 +336,8 @@ export class OasstApiClient { return this.get(`/api/v1/messages?${params}`); } - fetch_recent_messages() { - return this.get(`/api/v1/messages`); + fetch_recent_messages(lang: string) { + return this.get(`/api/v1/messages`, { lang }); } fetch_message_children(messageId: string) { diff --git a/website/src/lib/routes.ts b/website/src/lib/routes.ts new file mode 100644 index 00000000..f85b5a31 --- /dev/null +++ b/website/src/lib/routes.ts @@ -0,0 +1,3 @@ +export const ROUTES = { + ADMIN_MESSAGE_DETAIL: (id: string) => `/admin/messages/${id}`, +}; diff --git a/website/src/pages/admin/manage_user/[id].tsx b/website/src/pages/admin/manage_user/[id].tsx index a68bca16..ee7bcc66 100644 --- a/website/src/pages/admin/manage_user/[id].tsx +++ b/website/src/pages/admin/manage_user/[id].tsx @@ -1,19 +1,39 @@ -import { Button, Card, CardBody, Container, FormControl, FormLabel, Input, Stack, useToast } from "@chakra-ui/react"; -import { InferGetServerSidePropsType } from "next"; +import { + Accordion, + AccordionButton, + AccordionIcon, + AccordionItem, + AccordionPanel, + Box, + Button, + Card, + CardBody, + CardHeader, + Checkbox, + CircularProgress, + FormControl, + FormLabel, + Input, + Stack, + useToast, +} from "@chakra-ui/react"; +import { GetServerSideProps, InferGetServerSidePropsType } from "next"; import Head from "next/head"; -import { useRouter } from "next/router"; -import { useSession } from "next-auth/react"; import { serverSideTranslations } from "next-i18next/serverSideTranslations"; -import { useEffect } from "react"; import { useForm } from "react-hook-form"; +import { AdminArea } from "src/components/AdminArea"; +import { useCursorPagination } from "src/components/DataTable/useCursorPagination"; +import { JsonCard } from "src/components/JsonCard"; import { getAdminLayout } from "src/components/Layout"; +import { AdminMessageTable } from "src/components/Messages/AdminMessageTable"; import { Role, RoleSelect } from "src/components/RoleSelect"; -import { UserMessagesCell } from "src/components/UserMessagesCell"; -import { post } from "src/lib/api"; +import { get, post } from "src/lib/api"; import { userlessApiClient } from "src/lib/oasst_client_factory"; import prisma from "src/lib/prismadb"; +import { FetchUserMessagesCursorResponse } from "src/types/Conversation"; +import { User } from "src/types/Users"; +import useSWRImmutable from "swr/immutable"; import useSWRMutation from "swr/mutation"; - interface UserForm { user_id: string; id: string; @@ -21,33 +41,17 @@ interface UserForm { display_name: string; role: Role; notes: string; + show_on_leaderboard: boolean; } const ManageUser = ({ user }: InferGetServerSidePropsType) => { const toast = useToast(); - const router = useRouter(); - const { data: session, status } = useSession(); - - // Check when the user session is loaded and re-route if the user is not an - // admin. This follows the suggestion by NextJS for handling private pages: - // https://nextjs.org/docs/api-reference/next/router#usage - // - // All admin pages should use the same check and routing steps. - useEffect(() => { - if (status === "loading") { - return; - } - if (session?.user?.role === "admin") { - return; - } - router.push("/"); - }, [router, session, status]); // Trigger to let us update the user's role. Triggers a toast when complete. const { trigger } = useSWRMutation("/api/admin/update_user", post, { onSuccess: () => { toast({ - title: "User Role Updated", + title: "Updated user", status: "success", duration: 1000, isClosable: true, @@ -76,8 +80,8 @@ const ManageUser = ({ user }: InferGetServerSidePropsType - - + +
trigger(data))}> @@ -88,32 +92,97 @@ const ManageUser = ({ user }: InferGetServerSidePropsTypeDisplay Name - + Role - + Notes + + Show on leaderboard + + + + + + + Raw JSON + + + + + {user} + + +
-
- -
+ + + {`User's messages`} + + + + + + + ); }; +const UserMessageTable = ({ id }: { id: User["id"] }) => { + const { pagination, toNextPage, toPreviousPage } = useCursorPagination(); + const { data, error, isLoading } = useSWRImmutable( + `/api/admin/user_messages?user=${id}&cursor=${encodeURIComponent(pagination.cursor)}&direction=${ + pagination.direction + }`, + get, + { + keepPreviousData: true, + } + ); + + if (isLoading && !data) { + return ; + } + + if (error) { + return <>Unable to load messages.; + } + + return ( + toNextPage(data)} + onPreviousClick={() => toPreviousPage(data)} + > + ); +}; + /** * Fetch the user's data on the server side when rendering. */ -export async function getServerSideProps({ query, locale }) { - const backend_user = await userlessApiClient.fetch_user(query.id); +export const getServerSideProps: GetServerSideProps<{ user: User }, { id: string }> = async ({ + params, + locale = "en", +}) => { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + const backend_user = await userlessApiClient.fetch_user(params!.id as string); + + if (!backend_user) { + return { + notFound: true, + }; + } const local_user = await prisma.user.findUnique({ where: { id: backend_user.id }, select: { @@ -130,7 +199,7 @@ export async function getServerSideProps({ query, locale }) { ...(await serverSideTranslations(locale, ["common"])), }, }; -} +}; ManageUser.getLayout = getAdminLayout; diff --git a/website/src/pages/admin/trollboard.tsx b/website/src/pages/admin/trollboard.tsx index aaef80f3..47e69d6b 100644 --- a/website/src/pages/admin/trollboard.tsx +++ b/website/src/pages/admin/trollboard.tsx @@ -18,7 +18,7 @@ const Leaderboard = () => { - {t("leaderboard")} + Trollboard diff --git a/website/src/pages/api/admin/delete_message/[id].ts b/website/src/pages/api/admin/delete_message/[id].ts index 3dc607ed..03c070d6 100644 --- a/website/src/pages/api/admin/delete_message/[id].ts +++ b/website/src/pages/api/admin/delete_message/[id].ts @@ -1,7 +1,7 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; -const handler = withRole("admin", async (req, res, token) => { +const handler = withAnyRole(["admin", "moderator"], async (req, res, token) => { const { id } = req.query; try { const client = await createApiClient(token); diff --git a/website/src/pages/api/admin/parameters.ts b/website/src/pages/api/admin/parameters.ts index 73e04329..3c91488b 100644 --- a/website/src/pages/api/admin/parameters.ts +++ b/website/src/pages/api/admin/parameters.ts @@ -1,9 +1,15 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; -export default withRole("admin", async (_, res, token) => { +export default withAnyRole(["admin", "moderator"], async (_, res, token) => { const client = await createApiClient(token); + if (token.role === "moderator") { + const publicSettings = await client.fetch_public_settings(); + + return res.json(publicSettings); + } + try { const fullSettings = await client.fetch_full_settings(); diff --git a/website/src/pages/api/admin/status.ts b/website/src/pages/api/admin/status.ts index 956ad2cb..75dbb878 100644 --- a/website/src/pages/api/admin/status.ts +++ b/website/src/pages/api/admin/status.ts @@ -1,10 +1,10 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClientFromUser } from "src/lib/oasst_client_factory"; /** * Returns tasks availability, stats, and tree manager stats. */ -const handler = withRole("admin", async (req, res) => { +const handler = withAnyRole(["admin", "moderator"], async (req, res) => { // NOTE: why are we using a dummy user here? const dummyUser = { id: "__dummy_user__", diff --git a/website/src/pages/api/admin/stop_tree/[id].ts b/website/src/pages/api/admin/stop_tree/[id].ts index ade751b3..26dfd20b 100644 --- a/website/src/pages/api/admin/stop_tree/[id].ts +++ b/website/src/pages/api/admin/stop_tree/[id].ts @@ -1,7 +1,7 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; -const handler = withRole("admin", async (req, res, token) => { +const handler = withAnyRole(["admin", "moderator"], async (req, res, token) => { const { id } = req.query; try { const client = await createApiClient(token); diff --git a/website/src/pages/api/admin/trollboard.ts b/website/src/pages/api/admin/trollboard.ts index 9016f42b..be8e002d 100644 --- a/website/src/pages/api/admin/trollboard.ts +++ b/website/src/pages/api/admin/trollboard.ts @@ -1,8 +1,8 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; import { TrollboardTimeFrame } from "src/types/Trollboard"; -export default withRole("admin", async (req, res, token) => { +export default withAnyRole(["admin", "moderator"], async (req, res, token) => { const client = await createApiClient(token); const trollboard = await client.fetch_trollboard(req.query.time_frame as TrollboardTimeFrame, { diff --git a/website/src/pages/api/admin/update_user.ts b/website/src/pages/api/admin/update_user.ts index c71159ad..13ee97f5 100644 --- a/website/src/pages/api/admin/update_user.ts +++ b/website/src/pages/api/admin/update_user.ts @@ -6,7 +6,7 @@ import prisma from "src/lib/prismadb"; * Update's the user's data in the database. Accessible only to admins. */ const handler = withRole("admin", async (req, res, token) => { - const { id, auth_method, user_id, notes, role } = req.body; + const { id, auth_method, user_id, notes, role, show_on_leaderboard } = req.body; const oasstApiClient = await createApiClient(token); // If the user is authorized by the web, update their role. @@ -17,7 +17,7 @@ const handler = withRole("admin", async (req, res, token) => { }); } // Tell the backend the user's enabled or not enabled status. - await oasstApiClient.set_user_status(user_id, role !== "banned", notes); + await oasstApiClient.set_user_status(user_id, role !== "banned", notes, show_on_leaderboard); res.status(200).json({}); }); diff --git a/website/src/pages/api/admin/user_messages.ts b/website/src/pages/api/admin/user_messages.ts index 0223e8e3..57e9ba15 100644 --- a/website/src/pages/api/admin/user_messages.ts +++ b/website/src/pages/api/admin/user_messages.ts @@ -1,15 +1,25 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; -import type { Message } from "src/types/Conversation"; + +const LIMIT = 10; /** * Returns the messages recorded by the backend for a user. */ -const handler = withRole("admin", async (req, res, token) => { - const { user } = req.query; +const handler = withAnyRole(["admin", "moderator"], async (req, res, token) => { + const { cursor, direction, user } = req.query; + const oasstApiClient = await createApiClient(token); - const messages: Message[] = await oasstApiClient.fetch_user_messages(user as string); - res.status(200).json(messages); + + const response = await oasstApiClient.fetch_user_messages_cursor(user as string, { + include_deleted: true, + direction: direction as "back", + cursor: cursor as string, + max_count: LIMIT, + desc: true, + }); + + res.status(200).json(response); }); export default handler; diff --git a/website/src/pages/api/admin/users.ts b/website/src/pages/api/admin/users.ts index eae0f072..4c048412 100644 --- a/website/src/pages/api/admin/users.ts +++ b/website/src/pages/api/admin/users.ts @@ -1,4 +1,4 @@ -import { withRole } from "src/lib/auth"; +import { withAnyRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; import prisma from "src/lib/prismadb"; import { FetchUsersParams } from "src/types/Users"; @@ -17,7 +17,7 @@ const PAGE_SIZE = 20; * - `direction`: Either "forward" or "backward" representing the pagination * direction. */ -const handler = withRole("admin", async (req, res, token) => { +const handler = withAnyRole(["admin", "moderator"], async (req, res, token) => { const { cursor, direction, searchDisplayName = "", sortKey = "username" } = req.query; const oasstApiClient = await createApiClient(token); diff --git a/website/src/pages/api/auth/[...nextauth].ts b/website/src/pages/api/auth/[...nextauth].ts index 993011d3..f4266f6a 100644 --- a/website/src/pages/api/auth/[...nextauth].ts +++ b/website/src/pages/api/auth/[...nextauth].ts @@ -1,5 +1,6 @@ import { PrismaAdapter } from "@next-auth/prisma-adapter"; import { boolean } from "boolean"; +import { generateUsername } from "friendly-username-generator"; import { NextApiRequest, NextApiResponse } from "next"; import type { AuthOptions } from "next-auth"; import NextAuth from "next-auth"; @@ -11,7 +12,6 @@ import { checkCaptcha } from "src/lib/captcha"; import { createApiClientFromUser } from "src/lib/oasst_client_factory"; import prisma from "src/lib/prismadb"; import { BackendUserCore } from "src/types/Users"; -import { generateUsername } from "friendly-username-generator"; const providers: Provider[] = []; @@ -78,6 +78,14 @@ const adminUserMap = process.env.ADMIN_USERS.split(",").reduce((result, entry) = return result; }, new Map()); +const moderatorUserMap = process.env.MODERATOR_USERS.split(",").reduce((result, entry) => { + const [authType, id] = entry.split(":"); + const s = result.get(authType) || new Set(); + s.add(id); + result.set(authType, s); + return result; +}, new Map()); + const authOptions: AuthOptions = { // Ensure we can store user data in a database. adapter: PrismaAdapter(prisma), @@ -161,9 +169,10 @@ const authOptions: AuthOptions = { // Get the admin list for the user's auth type. const adminForAccountType = adminUserMap.get(account.provider); + const moderatorForAccountType = moderatorUserMap.get(account.provider); // Return early if there's no admin list. - if (!adminForAccountType) { + if (!adminForAccountType && !moderatorForAccountType) { return; } @@ -180,6 +189,17 @@ const authOptions: AuthOptions = { }, }); } + + if (moderatorForAccountType.has(account.providerAccountId)) { + await prisma.user.update({ + data: { + role: "moderator", + }, + where: { + id: user.id, + }, + }); + } }, }, }; diff --git a/website/src/pages/api/messages/index.ts b/website/src/pages/api/messages/index.ts index fbcaee3c..978ed3ff 100644 --- a/website/src/pages/api/messages/index.ts +++ b/website/src/pages/api/messages/index.ts @@ -1,9 +1,11 @@ import { withoutRole } from "src/lib/auth"; import { createApiClient } from "src/lib/oasst_client_factory"; +import { getUserLanguage } from "src/lib/users"; const handler = withoutRole("banned", async (req, res, token) => { const client = await createApiClient(token); - const messages = await client.fetch_recent_messages(); + const userLanguage = getUserLanguage(req); + const messages = await client.fetch_recent_messages(userLanguage); res.status(200).json(messages); }); diff --git a/website/src/pages/messages/index.tsx b/website/src/pages/messages/index.tsx index 8d950a2b..92784bb1 100644 --- a/website/src/pages/messages/index.tsx +++ b/website/src/pages/messages/index.tsx @@ -1,11 +1,13 @@ import { Box, CircularProgress, SimpleGrid, Text, useColorModeValue } from "@chakra-ui/react"; import Head from "next/head"; import { useTranslation } from "next-i18next"; +import { useCookies } from "react-cookie"; import { getDashboardLayout } from "src/components/Layout"; -import { MessageTable } from "src/components/Messages/MessageTable"; +import { MessageConversation } from "src/components/Messages/MessageConversation"; import { get } from "src/lib/api"; import useSWRImmutable from "swr/immutable"; export { getDefaultStaticProps as getStaticProps } from "src/lib/default_static_props"; +import { getLocaleDisplayName } from "src/lib/languages"; const MessagesDashboard = () => { const { t } = useTranslation(["message"]); @@ -15,6 +17,8 @@ const MessagesDashboard = () => { const { data: messages } = useSWRImmutable("/api/messages", get, { revalidateOnMount: true }); const { data: userMessages } = useSWRImmutable(`/api/messages/user`, get, { revalidateOnMount: true }); + const [cookies] = useCookies(["NEXT_LOCALE"]); + const currentLanguage = cookies["NEXT_LOCALE"] || "en"; return ( <> @@ -24,7 +28,9 @@ const MessagesDashboard = () => { - {t("recent_messages")} + {t("recent_messages", { + language: getLocaleDisplayName(currentLanguage), + })} { borderRadius="xl" className="p-3 sm:p-4 shadow-sm" > - {messages ? : } + {messages ? : } @@ -47,7 +53,11 @@ const MessagesDashboard = () => { borderRadius="xl" className="p-6 shadow-sm" > - {userMessages ? : } + {userMessages ? ( + + ) : ( + + )} diff --git a/website/src/styles/Theme/components/Table.ts b/website/src/styles/Theme/components/Table.ts new file mode 100644 index 00000000..ebf9f9df --- /dev/null +++ b/website/src/styles/Theme/components/Table.ts @@ -0,0 +1,20 @@ +import { tableAnatomy } from "@chakra-ui/anatomy"; +import { createMultiStyleConfigHelpers } from "@chakra-ui/react"; + +const { definePartsStyle, defineMultiStyleConfig } = createMultiStyleConfigHelpers(tableAnatomy.keys); + +export const tableTheme = defineMultiStyleConfig({ + variants: { + simple: definePartsStyle(({ colorMode }) => { + const isLightMode = colorMode === "light"; + return { + td: { + borderColor: isLightMode ? "gray.100" : "gray.800", + }, + th: { + borderColor: isLightMode ? "gray.100" : "gray.800", + }, + }; + }), + }, +}); diff --git a/website/src/styles/Theme/index.ts b/website/src/styles/Theme/index.ts index 91d54088..2fde5a8c 100644 --- a/website/src/styles/Theme/index.ts +++ b/website/src/styles/Theme/index.ts @@ -5,6 +5,7 @@ import { colors } from "./colors"; import { badgeTheme } from "./components/Badge"; import { cardTheme } from "./components/Card"; import { containerTheme } from "./components/Container"; +import { tableTheme } from "./components/Table"; const config: ThemeConfig = { initialColorMode: "light", @@ -16,6 +17,7 @@ const components = { Badge: badgeTheme, Container: containerTheme, Card: cardTheme, + Table: tableTheme, }; const breakpoints = { diff --git a/website/src/types/Conversation.ts b/website/src/types/Conversation.ts index 5eb86351..57a9efbb 100644 --- a/website/src/types/Conversation.ts +++ b/website/src/types/Conversation.ts @@ -19,8 +19,24 @@ export interface Message extends MessageEmojis { parent_id: string; frontend_message_id?: string; user_id: string; + user_is_author: boolean | null; + deleted: boolean | null; + synthetic: boolean | null; + message_tree_id: string; + ranking_count: number | null; + rank: number | null; + model_name: string | null; + review_count: number | null; } export interface Conversation { messages: Message[]; } + +export type FetchUserMessagesCursorResponse = { + next?: string; + prev?: string; + sort_key: string; + items: Message[]; + order: "asc" | "desc"; +}; diff --git a/website/src/types/Users.ts b/website/src/types/Users.ts index 5100dec4..6dd65e28 100644 --- a/website/src/types/Users.ts +++ b/website/src/types/Users.ts @@ -75,11 +75,11 @@ export interface BackendUser extends BackendUserCore { /** * An expanded User for the web. */ -export interface User extends BackendUser { +export interface User extends BackendUser { /** * The user's roles within the webapp. */ - role: string; + role: TRole; } export type FetchUsersParams = { diff --git a/website/src/types/utils.ts b/website/src/types/utils.ts index 82c35036..f3ac0af3 100644 --- a/website/src/types/utils.ts +++ b/website/src/types/utils.ts @@ -1,3 +1,10 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ // https://github.com/ts-essentials/ts-essentials/blob/25cae45c162f8784e3cdae8f43783d0c66370a57/lib/types.ts#L437 // eslint-disable-next-line @typescript-eslint/no-explicit-any export type ElementOf = T extends readonly (infer ET)[] ? ET : never; +type AnyRecord = Record; +type KeyofBase = keyof any; + +export type AnyArray = Array | ReadonlyArray; + +export type StrictOmit = T extends AnyArray ? never : Omit; diff --git a/website/types/env.d.ts b/website/types/env.d.ts index 39817489..13fd0a36 100644 --- a/website/types/env.d.ts +++ b/website/types/env.d.ts @@ -6,6 +6,8 @@ declare global { CLOUDFLARE_CAPTCHA_SECRET_KEY: string; NEXT_PUBLIC_ENABLE_EMAIL_SIGNIN_CAPTCHA: boolean; NEXT_PUBLIC_ENABLE_EMAIL_SIGNIN: boolean; + ADMIN_USERS: string; + MODERATOR_USERS: string; } } } diff --git a/website/types/i18next.d.ts b/website/types/i18next.d.ts index 05111279..8b7a0e24 100644 --- a/website/types/i18next.d.ts +++ b/website/types/i18next.d.ts @@ -4,7 +4,6 @@ import type index from "public/locales/en/index.json"; import type labelling from "public/locales/en/labelling.json"; import type leaderboard from "public/locales/en/leaderboard.json"; import type message from "public/locales/en/message.json"; -import type side_menu from "public/locales/en/side_menu.json"; import type tasks from "public/locales/en/tasks.json"; import type tos from "public/locales/en/tos.json"; @@ -18,7 +17,6 @@ declare module "i18next" { tasks: typeof tasks; message: typeof message; labelling: typeof labelling; - side_menu: typeof side_menu; tos: typeof tos; }; }