From 1facc25b9b46d3eb533769708bc7debce6f5c9f2 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Mon, 16 Jan 2023 23:25:16 +0100 Subject: [PATCH 01/62] updated prod playbook --- ansible/deploy-to-node.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index 94746437..a68db329 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -88,8 +88,9 @@ community.docker.docker_container: name: "oasst-{{ stack_name }}-pgbackrest" image: woblerr/pgbackrest:2.43 - state: "{{ 'stopped' if stack_name == 'production' else 'absent' }}" + state: "{{ 'started' if stack_name == 'production' else 'absent' }}" network_mode: "oasst-{{ stack_name }}" + command: "/bin/bash" volumes: - "./{{ stack_name }}/pgbackrest.conf:/etc/pgbackrest/pgbackrest.conf" - "oasst-{{ stack_name }}-postgres-backend:/var/lib/postgresql/data" From b0ce816e15b79822ab0319d918368fe612df4f3b Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Mon, 16 Jan 2023 23:28:30 +0100 Subject: [PATCH 02/62] updated prod playbook --- ansible/deploy-to-node.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index a68db329..86a0dbeb 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -90,7 +90,7 @@ image: woblerr/pgbackrest:2.43 state: "{{ 'started' if stack_name == 'production' else 'absent' }}" network_mode: "oasst-{{ stack_name }}" - command: "/bin/bash" + command: [sleep, infinity] volumes: - "./{{ stack_name }}/pgbackrest.conf:/etc/pgbackrest/pgbackrest.conf" - "oasst-{{ stack_name }}-postgres-backend:/var/lib/postgresql/data" From 4eb97daee08d6c9bf1f83a1b500545dc269dea5c Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Mon, 16 Jan 2023 23:32:09 +0100 Subject: [PATCH 03/62] updated prod playbook --- ansible/deploy-to-node.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index 86a0dbeb..4bb13bb6 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -90,7 +90,7 @@ image: woblerr/pgbackrest:2.43 state: "{{ 'started' if stack_name == 'production' else 'absent' }}" network_mode: "oasst-{{ stack_name }}" - command: [sleep, infinity] + entrypoint: ["/bin/bash", "-c", "sleep infinity"] volumes: - "./{{ stack_name }}/pgbackrest.conf:/etc/pgbackrest/pgbackrest.conf" - "oasst-{{ stack_name }}-postgres-backend:/var/lib/postgresql/data" From cd63df1cb22a0c5098b0839fd52b245ea1a19b17 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Mon, 16 Jan 2023 23:36:33 +0100 Subject: [PATCH 04/62] updated prod playbook --- ansible/pgbackrest.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/pgbackrest.conf b/ansible/pgbackrest.conf index 036826d3..147ff8c1 100644 --- a/ansible/pgbackrest.conf +++ b/ansible/pgbackrest.conf @@ -2,7 +2,7 @@ pg1-path=/var/lib/postgresql/data [global] -repo1-retention-full=3 # keep last 3 backups +repo1-retention-full=3 repo1-type=s3 repo1-path=/oasst-prod repo1-s3-region=us-east-1 From 1f4797055877a03e8a11bd19808c5e6168ccd581 Mon Sep 17 00:00:00 2001 From: rsandb Date: Mon, 16 Jan 2023 20:22:04 -0600 Subject: [PATCH 05/62] style 500 page --- website/src/components/EmptyState.tsx | 8 +++++++- website/src/pages/500.tsx | 25 ++++++++++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/website/src/components/EmptyState.tsx b/website/src/components/EmptyState.tsx index 8d82163c..825dc610 100644 --- a/website/src/components/EmptyState.tsx +++ b/website/src/components/EmptyState.tsx @@ -13,7 +13,7 @@ export const EmptyState = (props: EmptyStateProps) => { const router = useRouter(); return ( - + {props.text} @@ -32,3 +32,9 @@ export const TaskEmptyState = () => { export const PageEmptyState = () => { return ; }; + +export const ServerEmptyState = () => { + return ( + + ); +}; diff --git a/website/src/pages/500.tsx b/website/src/pages/500.tsx index d6e68074..ebbcee67 100644 --- a/website/src/pages/500.tsx +++ b/website/src/pages/500.tsx @@ -1,7 +1,8 @@ -import { Button, Link, Stack } from "@chakra-ui/react"; +import { Button, Box, Text, Center, Link, Stack } from "@chakra-ui/react"; import Head from "next/head"; import NextLink from "next/link"; import { FiAlertTriangle } from "react-icons/fi"; +import { ServerEmptyState } from "src/components/EmptyState"; export default function Error() { return ( @@ -10,23 +11,29 @@ export default function Error() { 500 - Open Assistant -
- -

Sorry, We encountered a server error. We're not sure what went wrong

-

Please file a but below and describe what you were trying to accomplish

- -
-
+
+ ); } From 54b5a4b0a39f72309794581de4fcc6c12d22e1b3 Mon Sep 17 00:00:00 2001 From: notmd Date: Tue, 17 Jan 2023 10:13:53 +0700 Subject: [PATCH 06/62] use `react-hook-form` everywhere --- website/src/pages/account/edit.tsx | 77 ++++++++++++++++++------------ website/src/pages/auth/signin.tsx | 44 ++++++++++------- 2 files changed, 74 insertions(+), 47 deletions(-) diff --git a/website/src/pages/account/edit.tsx b/website/src/pages/account/edit.tsx index 497e8238..fe8e8981 100644 --- a/website/src/pages/account/edit.tsx +++ b/website/src/pages/account/edit.tsx @@ -2,27 +2,11 @@ import { Button, Input, InputGroup } from "@chakra-ui/react"; import Head from "next/head"; import Router from "next/router"; import { useSession } from "next-auth/react"; -import React, { useState } from "react"; +import React from "react"; +import { Control, useForm, useWatch } from "react-hook-form"; export default function Account() { const { data: session } = useSession(); - const [username, setUsername] = useState(""); - - const updateUser = async (e: React.SyntheticEvent) => { - e.preventDefault(); - try { - const body = { username }; - await fetch("/api/username", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - }); - session.user.name = username; - await Router.push("/account"); - } catch (error) { - console.error(error); - } - }; if (!session) { return; @@ -39,21 +23,52 @@ export default function Account() {

{session.user.name || "No username"}

-
- - setUsername(e.target.value)} - placeholder="Edit Username" - type="text" - value={username} - > - - -
+
); } + +const EditForm = () => { + const { data: session } = useSession(); + + const updateUser = async ({ username }: { username: string }) => { + try { + const body = { username }; + await fetch("/api/username", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + session.user.name = username; + await Router.push("/account"); + } catch (error) { + console.error(error); + } + }; + + const { register, handleSubmit, control } = useForm<{ username: string }>({ + defaultValues: { + username: session?.user.name, + }, + }); + + return ( +
+ + + + +
+ ); +}; + +const SubmitButton = ({ control }: { control: Control<{ username: string }> }) => { + const username = useWatch({ control, name: "username" }); + return ( + + ); +}; diff --git a/website/src/pages/auth/signin.tsx b/website/src/pages/auth/signin.tsx index ab1c74ca..99488d81 100644 --- a/website/src/pages/auth/signin.tsx +++ b/website/src/pages/auth/signin.tsx @@ -6,11 +6,12 @@ import Link from "next/link"; import { useRouter } from "next/router"; import { ClientSafeProvider, getProviders, signIn } from "next-auth/react"; import React, { useEffect, useRef, useState } from "react"; +import { useForm } from "react-hook-form"; import { FaBug, FaDiscord, FaEnvelope, FaGithub } from "react-icons/fa"; import { AuthLayout } from "src/components/AuthLayout"; import { Footer } from "src/components/Footer"; import { Header } from "src/components/Header"; -import { RoleSelect } from "src/components/RoleSelect"; +import { Role, RoleSelect } from "src/components/RoleSelect"; export type SignInErrorTypes = | "Signin" @@ -60,15 +61,14 @@ function Signin({ providers }: SigninProps) { } }, [router]); - const signinWithEmail = (ev: React.FormEvent) => { - ev.preventDefault(); - signIn(email.id, { callbackUrl: "/dashboard", email: emailEl.current.value }); + const signinWithEmail = (data: { email: string }) => { + signIn(email.id, { callbackUrl: "/dashboard", email: data.email }); }; const { colorMode } = useColorMode(); const bgColorClass = colorMode === "light" ? "bg-gray-50" : "bg-chakra-gray-900"; const buttonBgColor = colorMode === "light" ? "#2563eb" : "#2563eb"; - + const { register, handleSubmit } = useForm<{ email: string }>(); return (
@@ -79,7 +79,7 @@ function Signin({ providers }: SigninProps) { {credentials && } {email && ( -
+ }> Continue with Email @@ -174,23 +174,35 @@ const SigninButton = (props: ButtonProps) => { ); }; +interface DebugSigninFormData { + username: string; + role: Role; +} + const DebugSigninForm = ({ credentials, bgColorClass }: { credentials: ClientSafeProvider; bgColorClass: string }) => { - const debugUsernameEl = useRef(null); - const roleRef = useRef(null); - function signinWithDebugCredentials(ev: React.FormEvent) { - ev.preventDefault(); + const { register, handleSubmit } = useForm({ + defaultValues: { + role: "general", + username: "dev", + }, + }); + + function signinWithDebugCredentials(data: DebugSigninFormData) { signIn(credentials.id, { callbackUrl: "/dashboard", - username: debugUsernameEl.current.value, - role: roleRef.current.value, + ...data, }); } + return ( - + For Debugging Only - - + + }>Continue with Debug User From e49654dfb7c43b41593ab70a4c3bbd6e68c73b59 Mon Sep 17 00:00:00 2001 From: AbdBarho Date: Tue, 17 Jan 2023 07:05:46 +0100 Subject: [PATCH 07/62] Scroll to top on submit --- website/src/components/Tasks/Task/Task.tsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/website/src/components/Tasks/Task/Task.tsx b/website/src/components/Tasks/Task/Task.tsx index 05410d4e..3d393575 100644 --- a/website/src/components/Tasks/Task/Task.tsx +++ b/website/src/components/Tasks/Task/Task.tsx @@ -27,6 +27,8 @@ export const Task = ({ frontendId, task, trigger, mutate }) => { const replyContent = useRef(null); const [showUnchangedWarning, setShowUnchangedWarning] = useState(false); + const rootEl = useRef(null); + const taskType = TaskTypes.find((taskType) => taskType.type === task.type && taskType.mode === task.mode); const { trigger: sendRejection } = useSWRMutation("/api/reject_task", post, { @@ -89,6 +91,7 @@ export const Task = ({ frontendId, task, trigger, mutate }) => { content: replyContent.current, }); setTaskStatus("SUBMITTED"); + scrollToTop(rootEl.current); break; } default: @@ -138,7 +141,7 @@ export const Task = ({ frontendId, task, trigger, mutate }) => { } return ( -
+
{taskTypeComponent()} {
); }; + +const scrollToTop = (element: HTMLElement) => { + while (element) { + element.scrollTop = 0; + element = element.parentElement; + } +}; From ac5a0e8157755380f979592f48e990b4b6d74857 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Tue, 17 Jan 2023 15:11:33 +0900 Subject: [PATCH 08/62] Adding links to external documentation for tasks and in footers --- .../src/components/Dashboard/SlimFooter.tsx | 1 + website/src/components/Footer.tsx | 8 ++++- website/src/components/Tasks/CreateTask.tsx | 11 ++---- website/src/components/Tasks/EvaluateTask.tsx | 15 +++----- .../components/Tasks/LabelTask/LabelTask.tsx | 14 ++------ .../Tasks/TaskHeader/TaskHeader.tsx | 35 +++++++++++++++++++ .../src/components/Tasks/TaskHeader/index.tsx | 1 + website/src/components/Tasks/TaskTypes.tsx | 18 ++++++++++ 8 files changed, 71 insertions(+), 32 deletions(-) create mode 100644 website/src/components/Tasks/TaskHeader/TaskHeader.tsx create mode 100644 website/src/components/Tasks/TaskHeader/index.tsx diff --git a/website/src/components/Dashboard/SlimFooter.tsx b/website/src/components/Dashboard/SlimFooter.tsx index 5a7b093c..1c109eb4 100644 --- a/website/src/components/Dashboard/SlimFooter.tsx +++ b/website/src/components/Dashboard/SlimFooter.tsx @@ -20,6 +20,7 @@ export function SlimFooter() { + diff --git a/website/src/components/Footer.tsx b/website/src/components/Footer.tsx index b22653e9..b239708a 100644 --- a/website/src/components/Footer.tsx +++ b/website/src/components/Footer.tsx @@ -42,7 +42,7 @@ export function Footer() { diff --git a/website/src/components/Tasks/CreateTask.tsx b/website/src/components/Tasks/CreateTask.tsx index 79d081ce..6cbead52 100644 --- a/website/src/components/Tasks/CreateTask.tsx +++ b/website/src/components/Tasks/CreateTask.tsx @@ -4,6 +4,7 @@ import { MessageTable } from "src/components/Messages/MessageTable"; import { TrackedTextarea } from "src/components/Survey/TrackedTextarea"; import { TwoColumnsWithCards } from "src/components/Survey/TwoColumnsWithCards"; import { TaskSurveyProps } from "src/components/Tasks/Task"; +import { TaskHeader } from "src/components/Tasks/TaskHeader"; export const CreateTask = ({ task, @@ -14,7 +15,6 @@ export const CreateTask = ({ }: TaskSurveyProps<{ text: string }>) => { const cardColor = useColorModeValue("gray.50", "gray.800"); const titleColor = useColorModeValue("gray.800", "gray.300"); - const labelColor = useColorModeValue("gray.600", "gray.400"); const [inputText, setInputText] = useState(""); const textChangeHandler = (event: React.ChangeEvent) => { @@ -33,14 +33,7 @@ export const CreateTask = ({
<> - - - {taskType.label} - - - {taskType.overview} - - + {task.conversation ? ( diff --git a/website/src/components/Tasks/EvaluateTask.tsx b/website/src/components/Tasks/EvaluateTask.tsx index 55aa4b3c..6ec92a96 100644 --- a/website/src/components/Tasks/EvaluateTask.tsx +++ b/website/src/components/Tasks/EvaluateTask.tsx @@ -1,19 +1,19 @@ -import { Box, Stack, Text, useColorModeValue } from "@chakra-ui/react"; +import { Box, useColorModeValue } from "@chakra-ui/react"; import { useEffect } from "react"; import { MessageTable } from "src/components/Messages/MessageTable"; import { Sortable } from "src/components/Sortable/Sortable"; import { SurveyCard } from "src/components/Survey/SurveyCard"; import { TaskSurveyProps } from "src/components/Tasks/Task"; +import { TaskHeader } from "src/components/Tasks/TaskHeader"; export const EvaluateTask = ({ task, + taskType, isEditable, isDisabled, onReplyChanged, }: TaskSurveyProps<{ ranking: number[] }>) => { const cardColor = useColorModeValue("gray.50", "gray.800"); - const titleColor = useColorModeValue("gray.800", "gray.300"); - const labelColor = useColorModeValue("gray.600", "gray.400"); let messages = []; if (task.conversation) { @@ -36,14 +36,7 @@ export const EvaluateTask = ({
- - - Instructions - - - Given the following {sortables}, sort them from best to worst, best being first, worst being last. - - + diff --git a/website/src/components/Tasks/LabelTask/LabelTask.tsx b/website/src/components/Tasks/LabelTask/LabelTask.tsx index 7299732e..7d6394df 100644 --- a/website/src/components/Tasks/LabelTask/LabelTask.tsx +++ b/website/src/components/Tasks/LabelTask/LabelTask.tsx @@ -1,5 +1,4 @@ -import { Box } from "@chakra-ui/react"; -import { Text, useColorModeValue } from "@chakra-ui/react"; +import { Box, useColorModeValue } from "@chakra-ui/react"; import { useEffect, useState } from "react"; import { MessageView } from "src/components/Messages"; import { MessageTable } from "src/components/Messages/MessageTable"; @@ -7,6 +6,7 @@ import { LabelRadioGroup } from "src/components/Survey/LabelRadioGroup"; import { LabelSliderGroup } from "src/components/Survey/LabelSliderGroup"; import { TwoColumnsWithCards } from "src/components/Survey/TwoColumnsWithCards"; import { TaskSurveyProps } from "src/components/Tasks/Task"; +import { TaskHeader } from "src/components/Tasks/TaskHeader"; import { TaskType } from "src/types/Task"; export const LabelTask = ({ @@ -36,20 +36,12 @@ export const LabelTask = ({ }; const cardColor = useColorModeValue("gray.50", "gray.800"); - const titleColor = useColorModeValue("gray.800", "gray.300"); - const labelColor = useColorModeValue("gray.600", "gray.400"); return (
<> - - {taskType.label} - - - {taskType.overview} - - + {task.conversation ? ( { + const labelColor = useColorModeValue("gray.600", "gray.400"); + const titleColor = useColorModeValue("gray.800", "gray.300"); + return ( + + + + {taskType.label} + + + } /> + + + + {taskType.overview} + + + ); +}; + +export { TaskHeader }; diff --git a/website/src/components/Tasks/TaskHeader/index.tsx b/website/src/components/Tasks/TaskHeader/index.tsx new file mode 100644 index 00000000..d328d709 --- /dev/null +++ b/website/src/components/Tasks/TaskHeader/index.tsx @@ -0,0 +1 @@ +export * from "./TaskHeader"; diff --git a/website/src/components/Tasks/TaskTypes.tsx b/website/src/components/Tasks/TaskTypes.tsx index 4a85ccbd..868a9fb8 100644 --- a/website/src/components/Tasks/TaskTypes.tsx +++ b/website/src/components/Tasks/TaskTypes.tsx @@ -11,6 +11,7 @@ export interface TaskInfo { category: TaskCategory; pathname: string; type: string; + help_link: string; mode?: string; overview?: string; instruction?: string; @@ -26,6 +27,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Help us improve Open Assistant by starting a random task.", category: TaskCategory.Tasks, pathname: "/tasks/random", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", type: "random", update_type: "random", }, @@ -35,6 +37,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Write initial prompts to help Open Assistant to try replying to diverse messages.", category: TaskCategory.Create, pathname: "/create/initial_prompt", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", type: "initial_prompt", overview: "Create an initial message to send to the assistant", instruction: "Provide the initial prompt", @@ -45,6 +48,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Chat with Open Assistant and help improve it’s responses as you interact with it.", category: TaskCategory.Create, pathname: "/create/user_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/tasks/reply_as_user", type: "prompter_reply", overview: "Given the following conversation, provide an adequate reply", instruction: "Provide the user's reply", @@ -55,6 +59,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Help Open Assistant improve its responses to conversations with other users.", category: TaskCategory.Create, pathname: "/create/assistant_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/tasks/reply_as_assistant", type: "assistant_reply", overview: "Given the following conversation, provide an adequate reply", instruction: "Provide the assistant's reply", @@ -66,6 +71,8 @@ export const TaskTypes: TaskInfo[] = [ category: TaskCategory.Evaluate, desc: "Help Open Assistant improve its responses to conversations with other users.", pathname: "/evaluate/rank_user_replies", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", + overview: "Given the following User replies, sort them from best to worst, best being first, worst being last.", type: "rank_prompter_replies", update_type: "message_ranking", unchanged_title: "Order Unchanged", @@ -76,6 +83,9 @@ export const TaskTypes: TaskInfo[] = [ desc: "Score prompts given by Open Assistant based on their accuracy and readability.", category: TaskCategory.Evaluate, pathname: "/evaluate/rank_assistant_replies", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", + overview: + "Given the following Assistant replies, sort them from best to worst, best being first, worst being last.", type: "rank_assistant_replies", update_type: "message_ranking", unchanged_title: "Order Unchanged", @@ -86,6 +96,8 @@ export const TaskTypes: TaskInfo[] = [ desc: "Score prompts given by Open Assistant based on their accuracy and readability.", category: TaskCategory.Evaluate, pathname: "/evaluate/rank_initial_prompts", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", + overview: "Given the following inital prompts, sort them from best to worst, best being first, worst being last.", type: "rank_initial_prompts", update_type: "message_ranking", unchanged_title: "Order Unchanged", @@ -97,6 +109,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_initial_prompt", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", overview: "Provide labels for the following prompt", type: "label_initial_prompt", mode: "full", @@ -107,6 +120,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_prompter_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/tasks/label_prompter_reply", overview: "Given the following discussion, provide labels for the final prompt", type: "label_prompter_reply", mode: "full", @@ -117,6 +131,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_assistant_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/tasks/label_assistant_reply", overview: "Given the following discussion, provide labels for the final prompt.", type: "label_assistant_reply", mode: "full", @@ -128,6 +143,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_initial_prompt", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", overview: "Read the following prompt and then answer the question about it.", type: "label_initial_prompt", mode: "simple", @@ -138,6 +154,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_prompter_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", overview: "Read the following conversation and then answer the question about the last prompt in the discussion.", type: "label_prompter_reply", mode: "simple", @@ -148,6 +165,7 @@ export const TaskTypes: TaskInfo[] = [ desc: "Provide labels for a prompt.", category: TaskCategory.Label, pathname: "/label/label_assistant_reply", + help_link: "https://projects.laion.ai/Open-Assistant/docs/guides/prompting", overview: "Read the following conversation and then answer the question about the last prompt in the discussion.", type: "label_assistant_reply", mode: "simple", From 8b30c7b68ea3196a87041f892fa619feed5a1220 Mon Sep 17 00:00:00 2001 From: dhug <38571110+danielpatrickhug@users.noreply.github.com> Date: Tue, 17 Jan 2023 02:27:21 -0500 Subject: [PATCH 09/62] add changes for auto_main, tree_manager, and utils/ranking (#786) * add changes for auto_main, tree_manager, and utils/ranking * pre-commit changes Co-authored-by: Alexander Mattick --- backend/oasst_backend/tree_manager.py | 18 ++ backend/oasst_backend/utils/ranking.py | 140 ++++++++++++++ text-frontend/auto_main.py | 250 +++++++++++++++++++++++++ 3 files changed, 408 insertions(+) create mode 100644 backend/oasst_backend/utils/ranking.py create mode 100644 text-frontend/auto_main.py diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 225b0146..a2c85940 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -13,6 +13,7 @@ from oasst_backend.models import Message, MessageReaction, MessageTreeState, Tas from oasst_backend.prompt_repository import PromptRepository from oasst_backend.utils.database_utils import CommitMode, async_managed_tx_method, managed_tx_method from oasst_backend.utils.hugging_face import HfClassificationModel, HfEmbeddingModel, HfUrl, HuggingFaceAPI +from oasst_backend.utils.ranking import ranked_pairs from oasst_shared.exceptions.oasst_api_error import OasstError, OasstErrorCode from oasst_shared.schemas import protocol as protocol_schema from sqlalchemy.sql import text @@ -587,6 +588,7 @@ class TreeManager: self._enter_state(mts, message_tree_state.State.RANKING) return True + @managed_tx_method(CommitMode.COMMIT) def check_condition_for_scoring_state(self, message_tree_id: UUID) -> bool: logger.debug(f"check_condition_for_scoring_state({message_tree_id=})") mts: MessageTreeState @@ -603,8 +605,24 @@ class TreeManager: return False self._enter_state(mts, message_tree_state.State.READY_FOR_SCORING) + self.update_message_ranks(rankings_by_message) return True + @managed_tx_method(CommitMode.COMMIT) + def update_message_ranks(self, rankings_by_message: Dict[int, int]) -> None: + for parent_msg_id, ranking in rankings_by_message.items(): + sorted_messages = [] + for msg_reaction in ranking: + sorted_messages.append(msg_reaction.payload.payload.ranked_message_ids) + logger.debug(f"SORTED MESSAGE {sorted_messages}") + consensus = ranked_pairs(sorted_messages) + logger.debug(f"CONSENSUS: {consensus}\n\n") + for rank, uuid in enumerate(consensus): + # set rank for each message_id for Message rows + msg = self.db.query(Message).filter(Message.id == uuid).one() + msg.rank = rank + self.db.add(msg) + def _calculate_acceptance(self, labels: list[TextLabels]): # calculate acceptance based on spam label return np.mean([1 - l.labels[protocol_schema.TextLabel.spam] for l in labels]) diff --git a/backend/oasst_backend/utils/ranking.py b/backend/oasst_backend/utils/ranking.py new file mode 100644 index 00000000..f6e7a31e --- /dev/null +++ b/backend/oasst_backend/utils/ranking.py @@ -0,0 +1,140 @@ +from typing import List + +import numpy as np + + +def head_to_head_votes(ranks: List[List[int]]): + tallies = np.zeros((len(ranks[0]), len(ranks[0]))) + names = sorted(ranks[0]) + ranks = np.array(ranks) + # we want the sorted indices + ranks = np.argsort(ranks, axis=1) + for i in range(ranks.shape[1]): + for j in range(i + 1, ranks.shape[1]): + # now count the cases someone voted for i over j + over_j = np.sum(ranks[:, i] < ranks[:, j]) + over_i = np.sum(ranks[:, j] < ranks[:, i]) + tallies[i, j] = over_j + # tallies[i,j] = over_i + tallies[j, i] = over_i + # tallies[j,i] = over_j + return tallies, names + + +def cycle_detect(pairs): + """Recursively detect cylces by removing condorcet losers until either only one pair is left or condorcet loosers no longer exist + This method upholds the invariant that in a ranking for all a,b either a>b or b>a for all a,b. + + + Returns + ------- + out : False if the pairs do not contain a cycle, True if the pairs contain a cycle + + + """ + # get all condorcet losers (pairs that loose to all other pairs) + # idea: filter all losers that are never winners + # print("pairs", pairs) + if len(pairs) <= 1: + return False + losers = [c_lose for c_lose in np.unique(pairs[:, 1]) if c_lose not in pairs[:, 0]] + if len(losers) == 0: + # if we recursively removed pairs, and at some point we did not have + # a condorcet loser, that means everything is both a winner and loser, + # yielding at least one (winner,loser), (loser,winner) pair + return True + + new = [] + for p in pairs: + if p[1] not in losers: + new.append(p) + return cycle_detect(np.array(new)) + + +def get_winner(pairs): + """ + This returns _one_ concordant winner. + It could be that there are multiple concordant winners, but in our case + since we are interested in a ranking, we have to choose one at random. + """ + losers = np.unique(pairs[:, 1]).astype(int) + winners = np.unique(pairs[:, 0]).astype(int) + for w in winners: + if w not in losers: + return w + + +def get_ranking(pairs): + """ + Abuses concordance property to get a (not necessarily unqiue) ranking. + The lack of uniqueness is due to the potential existence of multiple + equally ranked winners. We have to pick one, which is where + the non-uniqueness comes from + """ + if len(pairs) == 1: + return list(pairs[0]) + w = get_winner(pairs) + # now remove the winner from the list of pairs + p_new = np.array([(a, b) for a, b in pairs if a != w]) + return [w] + get_ranking(p_new) + + +def ranked_pairs(ranks: List[List[int]]): + """ + Expects a list of rankings for an item like: + [("w","x","z","y") for _ in range(3)] + + [("w","y","x","z") for _ in range(2)] + + [("x","y","z","w") for _ in range(4)] + + [("x","z","w","y") for _ in range(5)] + + [("y","w","x","z") for _ in range(1)] + This code is quite brain melting, but the idea is the following: + 1. create a head-to-head matrix that tallies up all win-lose combinations of preferences + 2. take all combinations that win more than they loose and sort those by how often they win + 3. use that to create an (implicit) directed graph + 4. recursively extract nodes from the graph that do not have incoming edges + 5. said recursive list is the ranking + """ + tallies, names = head_to_head_votes(ranks) + tallies = tallies - tallies.T + # print(tallies) + # note: the resulting tally matrix should be skew-symmetric + # order by strength of victory (using tideman's original method, don't think it would make a difference for us) + sorted_majorities = [] + for i in range(len(ranks[0])): + for j in range(len(ranks[i])): + if tallies[i, j] > 0: + sorted_majorities.append((i, j, tallies[i, j])) + # we don't explicitly deal with tied majorities here + sorted_majorities = np.array(sorted(sorted_majorities, key=lambda x: x[2], reverse=True)) + # now do lock ins + lock_ins = [] + for (x, y, _) in sorted_majorities: + # invariant: lock_ins has no cycles here + lock_ins.append((x, y)) + # print("lock ins are now",np.array(lock_ins)) + if cycle_detect(np.array(lock_ins)): + # print("backup: cycle detected") + # if there's a cycle, delete the new addition and continue + lock_ins = lock_ins[:-1] + # now simply return all winners in order, and attach the losers + # to the back. This is because the overall loser might not be unique + # and (by concordance property) may never exist in any winning set to begin with. + # (otherwise he would either not be the loser, or cycles exist!) + # Since there could be multiple overall losers, we just return them in any order + # as we are unable to find a closer ranking + numerical_ranks = np.array(get_ranking(np.array(lock_ins))).astype(int) + conversion = [names[n] for n in numerical_ranks] + return conversion + + +if __name__ == "__main__": + ranks = ( + [("w", "x", "z", "y") for _ in range(1)] + + [("w", "y", "x", "z") for _ in range(2)] + # + [("x","y","z","w") for _ in range(4)] + + [("x", "z", "w", "y") for _ in range(5)] + + [("y", "w", "x", "z") for _ in range(1)] + # [("y","z","w","x") for _ in range(1000)] + ) + rp = ranked_pairs(ranks) + print(rp) diff --git a/text-frontend/auto_main.py b/text-frontend/auto_main.py new file mode 100644 index 00000000..cea07c1e --- /dev/null +++ b/text-frontend/auto_main.py @@ -0,0 +1,250 @@ +"""Simple REPL frontend.""" + +import http +import random + +import requests +import typer + +app = typer.Typer() + + +# debug constants +USER = {"id": "1234", "display_name": "John Doe", "auth_method": "local"} + + +def _random_message_id(): + return str(random.randint(1000, 9999)) + + +def _render_message(message: dict) -> str: + """Render a message to the user.""" + if message["is_assistant"]: + return f"Assistant: {message['text']}" + return f"Prompter: {message['text']}" + + +@app.command() +def main(backend_url: str = "http://127.0.0.1:8080", api_key: str = "1234"): + """automates tasks""" + + def _post(path: str, json: dict) -> dict: + response = requests.post(f"{backend_url}{path}", json=json, headers={"X-API-Key": api_key}) + response.raise_for_status() + if response.status_code == http.HTTPStatus.NO_CONTENT: + return None + return response.json() + + def gen_random_text(): + return " ".join([random.choice(["hello", "world", "foo", "bar"]) for _ in range(10)]) + + def gen_random_ranking(messages): + """rank messages randomly and return list of indexes in order of rank randomly""" + print("Ranking") + print(messages) + print(len(messages)) + ranks = [i for i in range(len(messages))] + shuffled = random.shuffle(ranks) + print(ranks) + print(shuffled) + return ranks + + tasks = [_post("/api/v1/tasks/", {"type": "random", "user": USER})] + q = 0 + while tasks: + task = tasks.pop(0) + print(task) + + match (task["type"]): + case "initial_prompt": + typer.echo("Please provide an initial prompt to the assistant.") + if task["hint"]: + typer.echo(f"Hint: {task['hint']}") + # acknowledge task + message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + + prompt = gen_random_text() + user_message_id = _random_message_id() + # send interaction + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "text_reply_to_message", + "message_id": message_id, + "task_id": task["id"], + "user_message_id": user_message_id, + "text": prompt, + "user": USER, + }, + ) + tasks.append(new_task) + + case "label_initial_prompt": + typer.echo("Label the following prompt:") + typer.echo(task["prompt"]) + # acknowledge task + message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + + valid_labels = task["valid_labels"] + + labels_dict = None + if task["mode"] == "simple" and len(valid_labels) == 1: + answer = random.choice([True, False]) + labels_dict = {valid_labels[0]: 1 if answer else 0} + else: + while labels_dict is None: + labels = random.sample(valid_labels, random.randint(1, len(valid_labels))) + + if all([label in valid_labels for label in labels]): + labels_dict = {label: "1" if label in labels else "0" for label in valid_labels} + else: + invalid_labels = [label for label in labels if label not in valid_labels] + typer.echo(f"Invalid labels: {', '.join(invalid_labels)}. Valid: {', '.join(valid_labels)}") + + # send labels + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "text_labels", + "message_id": task["message_id"], + "task_id": task["id"], + "text": task["prompt"], + "labels": labels_dict, + "user": USER, + }, + ) + tasks.append(new_task) + case "prompter_reply": + # acknowledge task + message_id = _random_message_id() + user_message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + # send interaction + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "text_reply_to_message", + "message_id": message_id, + "task_id": task["id"], + "user_message_id": user_message_id, + "text": gen_random_text(), + "user": USER, + }, + ) + tasks.append(new_task) + + case "assistant_reply": + # acknowledge task + message_id = _random_message_id() + user_message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + # send interaction + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "text_reply_to_message", + "message_id": message_id, + "task_id": task["id"], + "user_message_id": user_message_id, + "text": gen_random_text(), + "user": USER, + }, + ) + tasks.append(new_task) + + case "rank_prompter_replies" | "rank_assistant_replies": + # acknowledge task + message_id = _random_message_id() + user_message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + # send interaction + ranking = gen_random_ranking(task["replies"]) + print(ranking) + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "message_ranking", + "message_id": message_id, + "task_id": task["id"], + "ranking": ranking, + "user": USER, + }, + ) + tasks.append(new_task) + + case "rank_initial_prompts": + # acknowledge task + message_id = _random_message_id() + user_message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + # send interaction + ranking = gen_random_ranking(task["prompots"]) + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "message_ranking", + "message_id": message_id, + "ranking": ranking, + "user": USER, + }, + ) + tasks.append(new_task) + + case "label_prompter_reply" | "label_assistant_reply": + # acknowledge task + typer.echo("Here is the conversation so far:") + for message in task["conversation"]["messages"]: + typer.echo(_render_message(message)) + + typer.echo("Label the following reply:") + typer.echo(task["reply"]) + message_id = _random_message_id() + user_message_id = _random_message_id() + _post(f"/api/v1/tasks/{task['id']}/ack", {"message_id": message_id}) + valid_labels = task["valid_labels"] + + labels_dict = None + if task["mode"] == "simple" and len(valid_labels) == 1: + answer = random.choice([True, False]) + labels_dict = {valid_labels[0]: 1 if answer else 0} + else: + while labels_dict is None: + labels = random.sample(valid_labels, random.randint(1, len(valid_labels))) + + if all([label in valid_labels for label in labels]): + labels_dict = {label: "1" if label in labels else "0" for label in valid_labels} + else: + invalid_labels = [label for label in labels if label not in valid_labels] + typer.echo(f"Invalid labels: {', '.join(invalid_labels)}. Valid: {', '.join(valid_labels)}") + # send interaction + new_task = _post( + "/api/v1/tasks/interaction", + { + "type": "text_labels", + "message_id": task["message_id"], + "task_id": task["id"], + "text": task["reply"], + "labels": labels_dict, + "user": USER, + }, + ) + tasks.append(new_task) + case "task_done": + typer.echo("Task done!") + # rerun with new task slected from above cases + # add a new task + q += 1 + if q == 10: + typer.echo("Task done!") + break + tasks = [_post("/api/v1/tasks/", {"type": "random", "user": USER})] + # + case _: + typer.echo(f"Unknown task type {task['type']}") + # rerun with new task slected from above cases + + +if __name__ == "__main__": + app() From d10f691eeb5a6dca5842d1e7a4bd4b19fa4b8020 Mon Sep 17 00:00:00 2001 From: rjmacarthy Date: Mon, 16 Jan 2023 19:56:44 +0000 Subject: [PATCH 10/62] Implemented i18n for internationalization and refactor index for localization support lint Fix Hero as tag types Fix build warning regarding i18n Update package.json and package-lock.json Revert package-lock.json Add package-lock, fix build Pre-commit Fix default export in favour of named export Refactor cta buttons to use react-icons Remove unused props from CircleBackground --- website/next-i18next.config.js | 6 + website/next.config.js | 3 + website/package-lock.json | 190 ++++++++++++++++-- website/package.json | 2 + website/public/locales/en/common.json | 4 + website/public/locales/en/index.json | 16 ++ .../AnimatedCircles/AnimatedCircles.tsx | 52 +++++ .../src/components/AnimatedCircles/index.tsx | 1 + website/src/components/CallToAction.tsx | 94 ++++----- website/src/components/Faq.tsx | 81 +++----- website/src/components/Hero.tsx | 101 +++------- website/src/pages/_app.tsx | 4 +- website/src/pages/index.tsx | 21 +- 13 files changed, 375 insertions(+), 200 deletions(-) create mode 100644 website/next-i18next.config.js create mode 100644 website/public/locales/en/common.json create mode 100644 website/public/locales/en/index.json create mode 100644 website/src/components/AnimatedCircles/AnimatedCircles.tsx create mode 100644 website/src/components/AnimatedCircles/index.tsx diff --git a/website/next-i18next.config.js b/website/next-i18next.config.js new file mode 100644 index 00000000..7c87a7a4 --- /dev/null +++ b/website/next-i18next.config.js @@ -0,0 +1,6 @@ +module.exports = { + i18n: { + defaultLocale: "en", + locales: ["en"], + }, +}; diff --git a/website/next.config.js b/website/next.config.js index 28da824f..a84ce736 100644 --- a/website/next.config.js +++ b/website/next.config.js @@ -1,4 +1,6 @@ /** @type {import('next').NextConfig} */ +const { i18n } = require("./next-i18next.config"); + const nextConfig = { output: "standalone", reactStrictMode: true, @@ -16,6 +18,7 @@ const nextConfig = { */ // scrollRestoration: true, }, + i18n, }; module.exports = nextConfig; diff --git a/website/package-lock.json b/website/package-lock.json index 1fa3d14d..2875bb42 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -33,6 +33,7 @@ "install": "^0.13.0", "next": "13.0.6", "next-auth": "^4.18.6", + "next-i18next": "^13.0.3", "nodemailer": "^6.8.0", "npm": "^9.2.0", "postcss-focus-visible": "^7.1.0", @@ -40,6 +41,7 @@ "react-dom": "18.2.0", "react-feature-flags": "^1.0.0", "react-hook-form": "^7.42.1", + "react-i18next": "^12.1.4", "react-icons": "^4.7.1", "react-table": "^7.8.0", "sharp": "^0.31.3", @@ -12653,6 +12655,15 @@ "@types/unist": "*" } }, + "node_modules/@types/hoist-non-react-statics": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/@types/hoist-non-react-statics/-/hoist-non-react-statics-3.3.1.tgz", + "integrity": "sha512-iMIqiko6ooLrTh1joXodJK5X9xeEALT1kM5G3ZLhD3hszxBdIEd5C75U834D9mLcINgD4OyZf5uQXjkuYydWvA==", + "dependencies": { + "@types/react": "*", + "hoist-non-react-statics": "^3.3.0" + } + }, "node_modules/@types/html-minifier-terser": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/@types/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz", @@ -12859,8 +12870,7 @@ "node_modules/@types/prop-types": { "version": "15.7.5", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.5.tgz", - "integrity": "sha512-JCB8C6SnDoQf0cNycqd/35A7MjcnK+ZTqE7judS6o7utxUCg6imJg3QK2qzHKszlTjcj2cn+NwMB2i96ubpj7w==", - "devOptional": true + "integrity": "sha512-JCB8C6SnDoQf0cNycqd/35A7MjcnK+ZTqE7judS6o7utxUCg6imJg3QK2qzHKszlTjcj2cn+NwMB2i96ubpj7w==" }, "node_modules/@types/qs": { "version": "6.9.7", @@ -12872,7 +12882,6 @@ "version": "18.0.26", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.0.26.tgz", "integrity": "sha512-hCR3PJQsAIXyxhTNSiDFY//LhnMZWpNNr5etoCqx/iUfGc5gXWtQR2Phl908jVR6uPXacojQWTg4qRpkxTuGug==", - "devOptional": true, "dependencies": { "@types/prop-types": "*", "@types/scheduler": "*", @@ -12891,8 +12900,7 @@ "node_modules/@types/scheduler": { "version": "0.16.2", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.2.tgz", - "integrity": "sha512-hppQEBDmlwhFAXKJX2KnWLYu5yMfi91yazPb2l+lbJiwW+wdo1gNeRA+3RgNSO39WYX2euey41KEwnqesU2Jew==", - "devOptional": true + "integrity": "sha512-hppQEBDmlwhFAXKJX2KnWLYu5yMfi91yazPb2l+lbJiwW+wdo1gNeRA+3RgNSO39WYX2euey41KEwnqesU2Jew==" }, "node_modules/@types/semver": { "version": "7.3.13", @@ -16550,7 +16558,6 @@ "version": "3.27.1", "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.27.1.tgz", "integrity": "sha512-GutwJLBChfGCpwwhbYoqfv03LAfmiz7e7D/BNxzeMxwQf10GRSzqiOjx7AmtEk+heiD/JWmBuyBPgFtx0Sg1ww==", - "dev": true, "hasInstallScript": true, "funding": { "type": "opencollective", @@ -21304,6 +21311,14 @@ "node": ">= 12" } }, + "node_modules/html-parse-stringify": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-parse-stringify/-/html-parse-stringify-3.0.1.tgz", + "integrity": "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg==", + "dependencies": { + "void-elements": "3.1.0" + } + }, "node_modules/html-tags": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.2.0.tgz", @@ -21446,6 +21461,34 @@ "integrity": "sha512-iimHkHPfIAQ8zCDQLgn08pRqSVioyWvnGfaQ8gond2wf7Jq2jJ+24ykmnRyiz3fIldcn4oUuQXpjqKLhSVR7lw==", "dev": true }, + "node_modules/i18next": { + "version": "22.4.9", + "resolved": "https://registry.npmjs.org/i18next/-/i18next-22.4.9.tgz", + "integrity": "sha512-8gWMmUz460KJDQp/ob3MNUX84cVuDRY9PLFPnV8d+Qezz/6dkjxwOaH70xjrCNDO+JrUL25iXfAIN9wUkInNZw==", + "funding": [ + { + "type": "individual", + "url": "https://locize.com" + }, + { + "type": "individual", + "url": "https://locize.com/i18next.html" + }, + { + "type": "individual", + "url": "https://www.i18next.com/how-to/faq#i18next-is-awesome.-how-can-i-support-the-project" + } + ], + "peer": true, + "dependencies": { + "@babel/runtime": "^7.20.6" + } + }, + "node_modules/i18next-fs-backend": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/i18next-fs-backend/-/i18next-fs-backend-2.1.1.tgz", + "integrity": "sha512-FTnj+UmNgT3YRml5ruRv0jMZDG7odOL/OP5PF5mOqvXud2vHrPOOs68Zdk6iqzL47cnnM0ZVkK2BAvpFeDJToA==" + }, "node_modules/iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -27523,6 +27566,45 @@ } } }, + "node_modules/next-i18next": { + "version": "13.0.3", + "resolved": "https://registry.npmjs.org/next-i18next/-/next-i18next-13.0.3.tgz", + "integrity": "sha512-7AA8J6WbkxRBtSf1+97LSAE7btxWZHsBIJEJ3FuTSBgYtpRiO5NGjcb8XbNAlz6yGU0TtS+yZE+/Wu83KhIT1Q==", + "funding": [ + { + "type": "individual", + "url": "https://locize.com/i18next.html" + }, + { + "type": "individual", + "url": "https://www.i18next.com/how-to/faq#i18next-is-awesome.-how-can-i-support-the-project" + }, + { + "type": "individual", + "url": "https://locize.com" + }, + { + "type": "individual", + "url": "https://github.com/belgattitude" + } + ], + "dependencies": { + "@babel/runtime": "^7.20.6", + "@types/hoist-non-react-statics": "^3.3.1", + "core-js": "^3", + "hoist-non-react-statics": "^3.3.2", + "i18next-fs-backend": "^2.1.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "i18next": "^22.0.6", + "next": ">= 12.0.0", + "react": ">= 17.0.2", + "react-i18next": "^12.1.1" + } + }, "node_modules/next/node_modules/postcss": { "version": "8.4.14", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.14.tgz", @@ -32497,6 +32579,27 @@ "react": "^16.8.0 || ^17 || ^18" } }, + "node_modules/react-i18next": { + "version": "12.1.4", + "resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-12.1.4.tgz", + "integrity": "sha512-XQND7jYtgM7ht5PH3yIZljCRpAMTlH/zmngM9ZjToqa+0BR6xuu8c7QF0WIIOEjcMTB2S3iOfpN/xG/ZrAnO6g==", + "dependencies": { + "@babel/runtime": "^7.20.6", + "html-parse-stringify": "^3.0.1" + }, + "peerDependencies": { + "i18next": ">= 19.0.0", + "react": ">= 16.8.0" + }, + "peerDependenciesMeta": { + "react-dom": { + "optional": true + }, + "react-native": { + "optional": true + } + } + }, "node_modules/react-icons": { "version": "4.7.1", "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.7.1.tgz", @@ -36507,6 +36610,14 @@ "integrity": "sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==", "dev": true }, + "node_modules/void-elements": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/void-elements/-/void-elements-3.1.0.tgz", + "integrity": "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/w3c-xmlserializer": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-4.0.0.tgz", @@ -46861,6 +46972,15 @@ "@types/unist": "*" } }, + "@types/hoist-non-react-statics": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/@types/hoist-non-react-statics/-/hoist-non-react-statics-3.3.1.tgz", + "integrity": "sha512-iMIqiko6ooLrTh1joXodJK5X9xeEALT1kM5G3ZLhD3hszxBdIEd5C75U834D9mLcINgD4OyZf5uQXjkuYydWvA==", + "requires": { + "@types/react": "*", + "hoist-non-react-statics": "^3.3.0" + } + }, "@types/html-minifier-terser": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/@types/html-minifier-terser/-/html-minifier-terser-6.1.0.tgz", @@ -47053,8 +47173,7 @@ "@types/prop-types": { "version": "15.7.5", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.5.tgz", - "integrity": "sha512-JCB8C6SnDoQf0cNycqd/35A7MjcnK+ZTqE7judS6o7utxUCg6imJg3QK2qzHKszlTjcj2cn+NwMB2i96ubpj7w==", - "devOptional": true + "integrity": "sha512-JCB8C6SnDoQf0cNycqd/35A7MjcnK+ZTqE7judS6o7utxUCg6imJg3QK2qzHKszlTjcj2cn+NwMB2i96ubpj7w==" }, "@types/qs": { "version": "6.9.7", @@ -47066,7 +47185,6 @@ "version": "18.0.26", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.0.26.tgz", "integrity": "sha512-hCR3PJQsAIXyxhTNSiDFY//LhnMZWpNNr5etoCqx/iUfGc5gXWtQR2Phl908jVR6uPXacojQWTg4qRpkxTuGug==", - "devOptional": true, "requires": { "@types/prop-types": "*", "@types/scheduler": "*", @@ -47085,8 +47203,7 @@ "@types/scheduler": { "version": "0.16.2", "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.2.tgz", - "integrity": "sha512-hppQEBDmlwhFAXKJX2KnWLYu5yMfi91yazPb2l+lbJiwW+wdo1gNeRA+3RgNSO39WYX2euey41KEwnqesU2Jew==", - "devOptional": true + "integrity": "sha512-hppQEBDmlwhFAXKJX2KnWLYu5yMfi91yazPb2l+lbJiwW+wdo1gNeRA+3RgNSO39WYX2euey41KEwnqesU2Jew==" }, "@types/semver": { "version": "7.3.13", @@ -49959,8 +50076,7 @@ "core-js": { "version": "3.27.1", "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.27.1.tgz", - "integrity": "sha512-GutwJLBChfGCpwwhbYoqfv03LAfmiz7e7D/BNxzeMxwQf10GRSzqiOjx7AmtEk+heiD/JWmBuyBPgFtx0Sg1ww==", - "dev": true + "integrity": "sha512-GutwJLBChfGCpwwhbYoqfv03LAfmiz7e7D/BNxzeMxwQf10GRSzqiOjx7AmtEk+heiD/JWmBuyBPgFtx0Sg1ww==" }, "core-js-compat": { "version": "3.27.1", @@ -53680,6 +53796,14 @@ } } }, + "html-parse-stringify": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/html-parse-stringify/-/html-parse-stringify-3.0.1.tgz", + "integrity": "sha512-KknJ50kTInJ7qIScF3jeaFRpMpE8/lfiTdzf/twXyPBLAGrLRTmkz3AdTnKeh40X8k9L2fdYwEp/42WGXIRGcg==", + "requires": { + "void-elements": "3.1.0" + } + }, "html-tags": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/html-tags/-/html-tags-3.2.0.tgz", @@ -53780,6 +53904,20 @@ "integrity": "sha512-iimHkHPfIAQ8zCDQLgn08pRqSVioyWvnGfaQ8gond2wf7Jq2jJ+24ykmnRyiz3fIldcn4oUuQXpjqKLhSVR7lw==", "dev": true }, + "i18next": { + "version": "22.4.9", + "resolved": "https://registry.npmjs.org/i18next/-/i18next-22.4.9.tgz", + "integrity": "sha512-8gWMmUz460KJDQp/ob3MNUX84cVuDRY9PLFPnV8d+Qezz/6dkjxwOaH70xjrCNDO+JrUL25iXfAIN9wUkInNZw==", + "peer": true, + "requires": { + "@babel/runtime": "^7.20.6" + } + }, + "i18next-fs-backend": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/i18next-fs-backend/-/i18next-fs-backend-2.1.1.tgz", + "integrity": "sha512-FTnj+UmNgT3YRml5ruRv0jMZDG7odOL/OP5PF5mOqvXud2vHrPOOs68Zdk6iqzL47cnnM0ZVkK2BAvpFeDJToA==" + }, "iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -58397,6 +58535,18 @@ "uuid": "^8.3.2" } }, + "next-i18next": { + "version": "13.0.3", + "resolved": "https://registry.npmjs.org/next-i18next/-/next-i18next-13.0.3.tgz", + "integrity": "sha512-7AA8J6WbkxRBtSf1+97LSAE7btxWZHsBIJEJ3FuTSBgYtpRiO5NGjcb8XbNAlz6yGU0TtS+yZE+/Wu83KhIT1Q==", + "requires": { + "@babel/runtime": "^7.20.6", + "@types/hoist-non-react-statics": "^3.3.1", + "core-js": "^3", + "hoist-non-react-statics": "^3.3.2", + "i18next-fs-backend": "^2.1.0" + } + }, "nice-try": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz", @@ -61888,6 +62038,15 @@ "integrity": "sha512-2UIGqwMZksd5HS55crTT1ATLTr0rAI4jS7yVuqTaoRVDhY2Qc4IyjskCmpnmdYqUNOYFy04vW253tb2JRVh+IQ==", "requires": {} }, + "react-i18next": { + "version": "12.1.4", + "resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-12.1.4.tgz", + "integrity": "sha512-XQND7jYtgM7ht5PH3yIZljCRpAMTlH/zmngM9ZjToqa+0BR6xuu8c7QF0WIIOEjcMTB2S3iOfpN/xG/ZrAnO6g==", + "requires": { + "@babel/runtime": "^7.20.6", + "html-parse-stringify": "^3.0.1" + } + }, "react-icons": { "version": "4.7.1", "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.7.1.tgz", @@ -64969,6 +65128,11 @@ "integrity": "sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==", "dev": true }, + "void-elements": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/void-elements/-/void-elements-3.1.0.tgz", + "integrity": "sha512-Dhxzh5HZuiHQhbvTW9AMetFfBHDMYpo23Uo9btPXgdYP+3T5S+p+jgNy7spra+veYhBP2dCSgxR/i2Y02h5/6w==" + }, "w3c-xmlserializer": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-4.0.0.tgz", diff --git a/website/package.json b/website/package.json index 580d0be3..5fd53fa4 100644 --- a/website/package.json +++ b/website/package.json @@ -50,6 +50,7 @@ "install": "^0.13.0", "next": "13.0.6", "next-auth": "^4.18.6", + "next-i18next": "^13.0.3", "nodemailer": "^6.8.0", "npm": "^9.2.0", "postcss-focus-visible": "^7.1.0", @@ -57,6 +58,7 @@ "react-dom": "18.2.0", "react-feature-flags": "^1.0.0", "react-hook-form": "^7.42.1", + "react-i18next": "^12.1.4", "react-icons": "^4.7.1", "react-table": "^7.8.0", "sharp": "^0.31.3", diff --git a/website/public/locales/en/common.json b/website/public/locales/en/common.json new file mode 100644 index 00000000..0b2df79c --- /dev/null +++ b/website/public/locales/en/common.json @@ -0,0 +1,4 @@ +{ + "discord": "Discord", + "github": "GitHub" +} diff --git a/website/public/locales/en/index.json b/website/public/locales/en/index.json new file mode 100644 index 00000000..3443e444 --- /dev/null +++ b/website/public/locales/en/index.json @@ -0,0 +1,16 @@ +{ + "title": "Open Assistant", + "subtitle": "Conversational AI for everyone.", + "description": "Conversational AI for everyone. An open source project to create a chat enabled GPT LLM run by LAION and contributors around the world.", + "blurb": "We believe we can create a revolution.", + "blurb1": "In the same way that Stable Diffusion helped the world make art and images in new ways, we want to improve the world by providing amazing conversational AI.", + "join_us_title": "Join us", + "join_us_description": "All open source projects begin with people like you. Open source is the belief that if we collaborate we can together gift our knowledge and technology to the world for the benefit of humanity. Are you in? Find us here:", + "faq_title": "Frequently Asked Questions", + "faq_items": { + "q0": "How far along is this project?", + "a0": "We are in the early stages of development, working from established research in applying RLHF to large language models.", + "q1": "Who is behind Open Assistant?", + "a1": "Open Assistant is a project organized by LAION and individuals around the world interested in bringing this technology to everyone." + } +} diff --git a/website/src/components/AnimatedCircles/AnimatedCircles.tsx b/website/src/components/AnimatedCircles/AnimatedCircles.tsx new file mode 100644 index 00000000..6241f7a6 --- /dev/null +++ b/website/src/components/AnimatedCircles/AnimatedCircles.tsx @@ -0,0 +1,52 @@ +import { Box, useColorMode } from "@chakra-ui/react"; +import React, { useId } from "react"; + +export const AnimatedCircles = () => { + const id = useId(); + const { colorMode } = useColorMode(); + const baseRingColor = colorMode === "light" ? "#d4d4d4" : "#005a69"; + const gradStopColor = colorMode === "light" ? "#06b6d4" : "#00f2ff"; + + return ( + + + + + ); +}; diff --git a/website/src/components/AnimatedCircles/index.tsx b/website/src/components/AnimatedCircles/index.tsx new file mode 100644 index 00000000..a5ca582a --- /dev/null +++ b/website/src/components/AnimatedCircles/index.tsx @@ -0,0 +1 @@ +export { AnimatedCircles } from "./AnimatedCircles"; diff --git a/website/src/components/CallToAction.tsx b/website/src/components/CallToAction.tsx index 8a07373f..e374a471 100644 --- a/website/src/components/CallToAction.tsx +++ b/website/src/components/CallToAction.tsx @@ -1,9 +1,14 @@ -import { useColorMode } from "@chakra-ui/react"; +import { Box, Link, Text, useColorMode } from "@chakra-ui/react"; +import { useTranslation } from "next-i18next"; import { useId } from "react"; +import { FaDiscord, FaGithub } from "react-icons/fa"; import { Container } from "./Container"; -function CircleBackground({ width = 558, height = 558, ...props }) { +const CIRCLE_HEIGHT = 558; +const CIRCLE_WIDTH = 558; + +function CircleBackground() { const id = useId(); const { colorMode } = useColorMode(); @@ -11,7 +16,14 @@ function CircleBackground({ width = 558, height = 558, ...props }) { const gradStopColor = colorMode === "light" ? "#fff" : "#000"; return ( -
- -
+ + + + -
-

Join Us

-

- All open source projects begin with people like you. Open source is the belief that if we collaborate we can - together gift our knowledge and technology to the world for the benefit of humanity. Are you in? Find us - here: -

- -
+ +
+
- + ); } diff --git a/website/src/components/Faq.tsx b/website/src/components/Faq.tsx index b8e5e8f8..55bb3585 100644 --- a/website/src/components/Faq.tsx +++ b/website/src/components/Faq.tsx @@ -1,73 +1,42 @@ -import { useColorMode } from "@chakra-ui/react"; +import { Box, List, ListItem, Text, useColorMode } from "@chakra-ui/react"; +import { useTranslation } from "next-i18next"; import { Container } from "./Container"; -const faqs = [ - [ - { - question: "How far along is this project?", - answer: - "We are in the early stages of development, working from established research in applying RLHF to large language models.", - }, - ], - [ - { - question: "Who is behind Open Assistant?", - answer: - "Open Assistant is a project organized by LAION and individuals around the world interested in bringing this technology to everyone.", - }, - ], - [ - // { - // question: 'Where can I learn more?', - // answer: - // 'Please feel free to reach out to us on Discord. We are happy to answer any questions you may have.', - // }, - ], -]; +const FAQS = Array.from({ length: 2 }); export function Faq() { const { colorMode } = useColorMode(); - + const { t } = useTranslation("index"); const headingColorClass = colorMode === "light" ? "text-gray-900" : "text-white"; const textColorClass = colorMode === "light" ? "text-gray-700" : "text-gray-100"; return ( -
+ -
-

- Frequently Asked Questions -

- {/*

- If you have anything else you want to ask,{' '} - - reach out to us - - . -

*/} -
-
    + + {t("faq_title")} + + + - {faqs.map((column, columnIndex) => ( -
  • -
      - {column.map((faq, faqIndex) => ( -
    • -

      {faq.question}

      -

      {faq.answer}

      -
    • - ))} -
    -
  • - ))} -
+ {FAQS.map((_, index) => { + return ( + + + {t(`faq_items.q${index}`)} + + + {t(`faq_items.a${index}`)} + + + ); + })} +
-
+ ); } diff --git a/website/src/components/Hero.tsx b/website/src/components/Hero.tsx index 65c80a42..4605e9e2 100644 --- a/website/src/components/Hero.tsx +++ b/website/src/components/Hero.tsx @@ -1,87 +1,36 @@ -import { useColorMode } from "@chakra-ui/react"; +import { Box, Text, useColorMode } from "@chakra-ui/react"; import Image from "next/image"; -import { useId } from "react"; +import { useTranslation } from "next-i18next"; import { Container } from "./Container"; - -function BackgroundIllustration(props) { - const id = useId(); - - const { colorMode } = useColorMode(); - const baseRingColor = colorMode === "light" ? "#d4d4d4" : "#005a69"; - const gradStopColor = colorMode === "light" ? "#06b6d4" : "#00f2ff"; - - return ( -
- - -
- ); -} +import { AnimatedCircles } from "./AnimatedCircles"; export function Hero() { + const { t } = useTranslation("index"); const { colorMode } = useColorMode(); const pTextColor = colorMode === "light" ? "text-gray-600" : "text-white"; const fancyTextGradientClasses = colorMode === "light" ? "from-blue-600 via-sky-400 to-blue-700" : "from-blue-500 via-sky-300 to-blue-400"; - return ( -
+ -
-
-

Open Assistant

-

+ + + {t("title")} + + - Conversational AI for everyone. -

-

We believe we can create a revolution.

-

- In the same way that Stable Diffusion helped the world make art and images in new ways, we want to improve - the world by providing amazing conversational AI. -

-
- -
- -
+ {t("subtitle")} + + {t("blurb")} + {t("blurb1")} + + + + {""} -
-
-
+
+ + -
+ ); } diff --git a/website/src/pages/_app.tsx b/website/src/pages/_app.tsx index 974226da..adadfe56 100644 --- a/website/src/pages/_app.tsx +++ b/website/src/pages/_app.tsx @@ -3,11 +3,13 @@ import "focus-visible"; import type { AppProps } from "next/app"; import { SessionProvider } from "next-auth/react"; +import { appWithTranslation } from "next-i18next"; import { FlagsProvider } from "react-feature-flags"; import { getDefaultLayout, NextPageWithLayout } from "src/components/Layout"; import flags from "src/flags"; import { SWRConfig, SWRConfiguration } from "swr"; +import nextI18NextConfig from "../../next-i18next.config.js"; import { Chakra, getServerSideProps } from "../styles/Chakra"; type AppPropsWithLayout = AppProps & { @@ -34,4 +36,4 @@ function MyApp({ Component, pageProps: { session, cookies, ...pageProps } }: App ); } export { getServerSideProps }; -export default MyApp; +export default appWithTranslation(MyApp, nextI18NextConfig); diff --git a/website/src/pages/index.tsx b/website/src/pages/index.tsx index 64b1a0d5..8fe5d852 100644 --- a/website/src/pages/index.tsx +++ b/website/src/pages/index.tsx @@ -1,6 +1,9 @@ +import { Box } from "@chakra-ui/react"; import Head from "next/head"; import { useRouter } from "next/router"; import { useSession } from "next-auth/react"; +import { useTranslation } from "next-i18next"; +import { serverSideTranslations } from "next-i18next/serverSideTranslations"; import { useEffect } from "react"; import { CallToAction } from "src/components/CallToAction"; import { Faq } from "src/components/Faq"; @@ -10,6 +13,7 @@ import { getTransparentHeaderLayout } from "src/components/Layout"; const Home = () => { const router = useRouter(); const { status } = useSession(); + const { t } = useTranslation("index"); useEffect(() => { if (status === "authenticated") { router.push("/dashboard"); @@ -19,21 +23,24 @@ const Home = () => { return ( <> - Open Assistant - + {t("title")} + -
+ -
+ ); }; Home.getLayout = getTransparentHeaderLayout; +export const getStaticProps = async ({ locale }) => ({ + props: { + ...(await serverSideTranslations(locale, ["index", "common"])), + }, +}); + export default Home; From 988913943f185a5edcafa6d4aeae94d23df368df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Tue, 17 Jan 2023 09:54:20 +0100 Subject: [PATCH 11/62] use '.value' of str-enums in db queries --- backend/oasst_backend/tree_manager.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index a2c85940..03aac56e 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -617,9 +617,9 @@ class TreeManager: logger.debug(f"SORTED MESSAGE {sorted_messages}") consensus = ranked_pairs(sorted_messages) logger.debug(f"CONSENSUS: {consensus}\n\n") - for rank, uuid in enumerate(consensus): + for rank, message_id in enumerate(consensus): # set rank for each message_id for Message rows - msg = self.db.query(Message).filter(Message.id == uuid).one() + msg = self.pr.fetch_message(message_id=message_id, fail_if_missing=True) msg.rank = rank self.db.add(msg) @@ -639,7 +639,7 @@ class TreeManager: .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, - MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW, + MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW.value, not_(Message.review_result), not_(Message.deleted), Message.review_count < self.cfg.num_reviews_initial_prompt, @@ -664,7 +664,7 @@ class TreeManager: .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, - MessageTreeState.state == message_tree_state.State.GROWING, + MessageTreeState.state == message_tree_state.State.GROWING.value, not_(Message.review_result), not_(Message.deleted), Message.review_count < self.cfg.num_reviews_reply, @@ -699,7 +699,7 @@ HAVING COUNT(m.id) > 1 and MIN(m.ranking_count) < :num_required_rankings text(self._sql_find_incomplete_rankings), { "num_required_rankings": self.cfg.num_required_rankings, - "ranking_state": message_tree_state.State.RANKING, + "ranking_state": message_tree_state.State.RANKING.value, }, ) return [IncompleteRankingsRow.from_orm(x) for x in r.all()] @@ -726,7 +726,10 @@ HAVING COUNT(c.id) < mts.max_children_count -- below maximum number of children r = self.db.execute( text(self._sql_find_extendible_parents), - {"growing_state": message_tree_state.State.GROWING, "num_reviews_reply": self.cfg.num_reviews_reply}, + { + "growing_state": message_tree_state.State.GROWING.value, + "num_reviews_reply": self.cfg.num_reviews_reply, + }, ) return [ExtendibleParentRow.from_orm(x) for x in r.all()] @@ -752,7 +755,7 @@ HAVING COUNT(m.id) < mts.goal_tree_size r = self.db.execute( text(self._sql_find_extendible_trees), { - "growing_state": message_tree_state.State.GROWING, + "growing_state": message_tree_state.State.GROWING.value, "num_reviews_reply": self.cfg.num_reviews_reply, }, ) @@ -850,7 +853,7 @@ LEFT JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'Rankin state = message_tree_state.State.INITIAL_PROMPT_REVIEW if tree_size > 1: state = message_tree_state.State.GROWING - logger.info(f"Inserting missing message tree state for message: {id} ({tree_size=}, {state=})") + logger.info(f"Inserting missing message tree state for message: {id} ({tree_size=}, {state=:s})") self._insert_default_state(id, state=state) def query_num_active_trees(self) -> int: From 6e4d8097b66ea61f679fa700438c524169a523e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Tue, 17 Jan 2023 10:15:30 +0100 Subject: [PATCH 12/62] fix availability for label_prompter_reply --- backend/oasst_backend/tree_manager.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 03aac56e..9bca0201 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -152,7 +152,7 @@ class TreeManager: task_count_by_type[protocol_schema.TaskRequestType.label_assistant_reply] = len( list(filter(lambda m: m.role == "assistant", replies_need_review)) ) - task_count_by_type[protocol_schema.TaskRequestType.prompter_reply] = len( + task_count_by_type[protocol_schema.TaskRequestType.label_prompter_reply] = len( list(filter(lambda m: m.role == "prompter", replies_need_review)) ) @@ -639,7 +639,7 @@ class TreeManager: .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, - MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW.value, + MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW, not_(Message.review_result), not_(Message.deleted), Message.review_count < self.cfg.num_reviews_initial_prompt, @@ -664,7 +664,7 @@ class TreeManager: .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, - MessageTreeState.state == message_tree_state.State.GROWING.value, + MessageTreeState.state == message_tree_state.State.GROWING, not_(Message.review_result), not_(Message.deleted), Message.review_count < self.cfg.num_reviews_reply, @@ -699,7 +699,7 @@ HAVING COUNT(m.id) > 1 and MIN(m.ranking_count) < :num_required_rankings text(self._sql_find_incomplete_rankings), { "num_required_rankings": self.cfg.num_required_rankings, - "ranking_state": message_tree_state.State.RANKING.value, + "ranking_state": message_tree_state.State.RANKING, }, ) return [IncompleteRankingsRow.from_orm(x) for x in r.all()] @@ -727,7 +727,7 @@ HAVING COUNT(c.id) < mts.max_children_count -- below maximum number of children r = self.db.execute( text(self._sql_find_extendible_parents), { - "growing_state": message_tree_state.State.GROWING.value, + "growing_state": message_tree_state.State.GROWING, "num_reviews_reply": self.cfg.num_reviews_reply, }, ) @@ -755,7 +755,7 @@ HAVING COUNT(m.id) < mts.goal_tree_size r = self.db.execute( text(self._sql_find_extendible_trees), { - "growing_state": message_tree_state.State.GROWING.value, + "growing_state": message_tree_state.State.GROWING, "num_reviews_reply": self.cfg.num_reviews_reply, }, ) @@ -924,16 +924,16 @@ if __name__ == "__main__": # print("query_num_active_trees", tm.query_num_active_trees()) # print("query_incomplete_rankings", tm.query_incomplete_rankings()) - # print("query_replies_need_review", tm.query_replies_need_review()) - # print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review()) + print("query_replies_need_review", tm.query_replies_need_review()) + print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review()) # print("query_extendible_trees", tm.query_extendible_trees()) # print("query_extendible_parents", tm.query_extendible_parents()) # print("query_tree_size", tm.query_tree_size(message_tree_id=UUID("bdf434cf-4df5-4b74-949c-a5a157bc3292"))) - print( - "query_reviews_for_message", - tm.query_reviews_for_message(message_id=UUID("6a444493-0d48-4316-a9f1-7e263f5a2473")), - ) + # print( + # "query_reviews_for_message", + # tm.query_reviews_for_message(message_id=UUID("6a444493-0d48-4316-a9f1-7e263f5a2473")), + # ) # print("next_task:", tm.next_task()) From d85cec3d2f3f85f7f1e1d671bb316a2c44e13c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Tue, 17 Jan 2023 10:21:57 +0100 Subject: [PATCH 13/62] extensible -> extendible --- backend/oasst_backend/tree_manager.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 9bca0201..e31cb70f 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -131,7 +131,7 @@ class TreeManager: def _determine_task_availability_internal( self, num_active_trees: int, - extensible_parents: list[ExtendibleParentRow], + extendible_parents: list[ExtendibleParentRow], prompts_need_review: list[Message], replies_need_review: list[Message], incomplete_rankings: list[IncompleteRankingsRow], @@ -142,10 +142,10 @@ class TreeManager: task_count_by_type[protocol_schema.TaskRequestType.initial_prompt] = num_missing_prompts task_count_by_type[protocol_schema.TaskRequestType.prompter_reply] = len( - list(filter(lambda x: x.parent_role == "assistant", extensible_parents)) + list(filter(lambda x: x.parent_role == "assistant", extendible_parents)) ) task_count_by_type[protocol_schema.TaskRequestType.assistant_reply] = len( - list(filter(lambda x: x.parent_role == "prompter", extensible_parents)) + list(filter(lambda x: x.parent_role == "prompter", extendible_parents)) ) task_count_by_type[protocol_schema.TaskRequestType.label_initial_prompt] = len(prompts_need_review) @@ -173,14 +173,14 @@ class TreeManager: def determine_task_availability(self) -> dict[protocol_schema.TaskRequestType, int]: num_active_trees = self.query_num_active_trees() - extensible_parents = self.query_extendible_parents() + extendible_parents = self.query_extendible_parents() prompts_need_review = self.query_prompts_need_review() replies_need_review = self.query_replies_need_review() incomplete_rankings = self.query_incomplete_rankings() return self._determine_task_availability_internal( num_active_trees=num_active_trees, - extensible_parents=extensible_parents, + extendible_parents=extendible_parents, prompts_need_review=prompts_need_review, replies_need_review=replies_need_review, incomplete_rankings=incomplete_rankings, @@ -195,7 +195,7 @@ class TreeManager: num_active_trees = self.query_num_active_trees() prompts_need_review = self.query_prompts_need_review() replies_need_review = self.query_replies_need_review() - extensible_parents = self.query_extendible_parents() + extendible_parents = self.query_extendible_parents() incomplete_rankings = self.query_incomplete_rankings() if not self.cfg.rank_prompter_replies: @@ -225,7 +225,7 @@ class TreeManager: else: task_count_by_type = self._determine_task_availability_internal( num_active_trees=num_active_trees, - extensible_parents=extensible_parents, + extendible_parents=extendible_parents, prompts_need_review=prompts_need_review, replies_need_review=replies_need_review, incomplete_rankings=incomplete_rankings, @@ -357,12 +357,12 @@ class TreeManager: case TaskType.REPLY: # select a tree with missing replies if task_role == TaskRole.PROMPTER: - extensible_parents = list(filter(lambda x: x.parent_role == "assistant", extensible_parents)) + extendible_parents = list(filter(lambda x: x.parent_role == "assistant", extendible_parents)) elif task_role == TaskRole.ASSISTANT: - extensible_parents = list(filter(lambda x: x.parent_role == "prompter", extensible_parents)) + extendible_parents = list(filter(lambda x: x.parent_role == "prompter", extendible_parents)) - if len(extensible_parents) > 0: - random_parent = random.choice(extensible_parents) + if len(extendible_parents) > 0: + random_parent = random.choice(extendible_parents) # fetch random conversation to extend logger.debug(f"selected {random_parent=}") From 2ee01d1224b80e1e2bd3c81b9d453a07e60ec94a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Tue, 17 Jan 2023 10:45:52 +0100 Subject: [PATCH 14/62] fix query in TreeManager.query_extendible_parents() --- backend/oasst_backend/tree_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index e31cb70f..bd16fbf1 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -715,8 +715,8 @@ WHERE mts.active -- only consider active trees AND NOT m.deleted -- ignore deleted messages as parents AND m.depth < mts.max_depth -- ignore leaf nodes as parents AND m.review_result -- parent node must have positive review - AND NOT c.deleted -- don't count deleted children - AND (c.review_result OR c.review_count < :num_reviews_reply) -- don't count children with negative review but count elements under review + AND NOT coalesce(c.deleted, FALSE) -- don't count deleted children + AND (c.review_result OR coalesce(c.review_count, 0) < :num_reviews_reply) -- don't count children with negative review but count elements under review GROUP BY m.id, m.role, m.depth, m.message_tree_id, mts.max_children_count HAVING COUNT(c.id) < mts.max_children_count -- below maximum number of children """ From 9b6dc6bae2b3e7339764fdd813d38dbb6dcf9b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Tue, 17 Jan 2023 11:01:36 +0100 Subject: [PATCH 15/62] fix join types in TreeManager --- backend/oasst_backend/tree_manager.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index bd16fbf1..265591c1 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -636,7 +636,7 @@ class TreeManager: qry = ( self.db.query(Message) .select_from(MessageTreeState) - .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) + .join(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, MessageTreeState.state == message_tree_state.State.INITIAL_PROMPT_REVIEW, @@ -661,7 +661,7 @@ class TreeManager: qry = ( self.db.query(Message) .select_from(MessageTreeState) - .outerjoin(Message, MessageTreeState.message_tree_id == Message.message_tree_id) + .join(Message, MessageTreeState.message_tree_id == Message.message_tree_id) .filter( MessageTreeState.active, MessageTreeState.state == message_tree_state.State.GROWING, @@ -682,7 +682,7 @@ class TreeManager: SELECT m.parent_id, m.role, COUNT(m.id) children_count, MIN(m.ranking_count) child_min_ranking_count, COUNT(m.id) FILTER (WHERE m.ranking_count >= :num_required_rankings) as completed_rankings FROM message_tree_state mts - LEFT JOIN message m ON mts.message_tree_id = m.message_tree_id + INNER JOIN message m ON mts.message_tree_id = m.message_tree_id WHERE mts.active -- only consider active trees AND mts.state = :ranking_state -- message tree must be in ranking state AND m.review_result -- must be reviewed @@ -708,7 +708,7 @@ HAVING COUNT(m.id) > 1 and MIN(m.ranking_count) < :num_required_rankings -- find all extendible parent nodes SELECT m.id as parent_id, m.role as parent_role, m.depth, m.message_tree_id, COUNT(c.id) active_children_count FROM message_tree_state mts - LEFT JOIN message m ON mts.message_tree_id = m.message_tree_id -- all elements of message tree + INNER JOIN message m ON mts.message_tree_id = m.message_tree_id -- all elements of message tree LEFT JOIN message c ON m.id = c.parent_id -- child nodes WHERE mts.active -- only consider active trees AND mts.state = :growing_state -- message tree must be growing @@ -738,8 +738,8 @@ HAVING COUNT(c.id) < mts.max_children_count -- below maximum number of children SELECT m.message_tree_id, mts.goal_tree_size, COUNT(m.id) AS tree_size FROM ( SELECT DISTINCT message_tree_id FROM ({_sql_find_extendible_parents}) extendible_parents - ) trees LEFT JOIN message_tree_state mts ON trees.message_tree_id = mts.message_tree_id - LEFT JOIN message m ON mts.message_tree_id = m.message_tree_id + ) trees INNER JOIN message_tree_state mts ON trees.message_tree_id = mts.message_tree_id + INNER JOIN message m ON mts.message_tree_id = m.message_tree_id WHERE NOT m.deleted AND ( m.parent_id IS NOT NULL AND (m.review_result OR m.review_count < :num_reviews_reply) -- children @@ -787,7 +787,7 @@ HAVING COUNT(m.id) < mts.goal_tree_size """Find all initial prompt messages that have no associated message tree state""" qry_missing_tree_states = ( self.db.query(Message.id) - .join(MessageTreeState, isouter=True) + .outerjoin(MessageTreeState, Message.message_tree_id == MessageTreeState.message_tree_id) .filter( Message.parent_id.is_(None), Message.message_tree_id == Message.id, @@ -804,7 +804,7 @@ SELECT p.parent_id, mr.* FROM -- find parents with > 1 children SELECT m.parent_id, m.message_tree_id, COUNT(m.id) children_count FROM message_tree_state mts - LEFT JOIN message m ON mts.message_tree_id = m.message_tree_id + INNER JOIN message m ON mts.message_tree_id = m.message_tree_id WHERE m.review_result -- must be reviewed AND NOT m.deleted -- not deleted AND m.parent_id IS NOT NULL -- ignore initial prompts @@ -813,8 +813,8 @@ SELECT p.parent_id, mr.* FROM GROUP BY m.parent_id, m.message_tree_id HAVING COUNT(m.id) > 1 ) as p -LEFT JOIN task t ON p.parent_id = t.parent_message_id AND t.done AND (t.payload_type = 'RankPrompterRepliesPayload' OR t.payload_type = 'RankAssistantRepliesPayload') -LEFT JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'RankingReactionPayload' +INNER JOIN task t ON p.parent_id = t.parent_message_id AND t.done AND (t.payload_type = 'RankPrompterRepliesPayload' OR t.payload_type = 'RankAssistantRepliesPayload') +INNER JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'RankingReactionPayload' """ def query_tree_ranking_results( @@ -925,7 +925,7 @@ if __name__ == "__main__": # print("query_num_active_trees", tm.query_num_active_trees()) # print("query_incomplete_rankings", tm.query_incomplete_rankings()) print("query_replies_need_review", tm.query_replies_need_review()) - print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review()) + # print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review()) # print("query_extendible_trees", tm.query_extendible_trees()) # print("query_extendible_parents", tm.query_extendible_parents()) # print("query_tree_size", tm.query_tree_size(message_tree_id=UUID("bdf434cf-4df5-4b74-949c-a5a157bc3292"))) From ab90d7810acbf9a52b27d8707377a506c8409506 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 11:13:43 +0100 Subject: [PATCH 16/62] added postgres dockerfile --- .github/workflows/build-postgres.yaml | 17 +++++++++++++++++ docker/Dockerfile.postgres | 9 +++++++++ 2 files changed, 26 insertions(+) create mode 100644 .github/workflows/build-postgres.yaml create mode 100644 docker/Dockerfile.postgres diff --git a/.github/workflows/build-postgres.yaml b/.github/workflows/build-postgres.yaml new file mode 100644 index 00000000..6f6595f7 --- /dev/null +++ b/.github/workflows/build-postgres.yaml @@ -0,0 +1,17 @@ +name: Build OASST Postgres image + +on: + push: + branches: + - main + paths: + - docker/Dockerfile.postgres + +jobs: + build-postgres: + uses: ./.github/workflows/docker-build.yaml + with: + image-name: oasst-postgres + context: ./docker/ + dockerfile: docker/Dockerfile.postgres + build-args: "" \ No newline at end of file diff --git a/docker/Dockerfile.postgres b/docker/Dockerfile.postgres new file mode 100644 index 00000000..247dc46f --- /dev/null +++ b/docker/Dockerfile.postgres @@ -0,0 +1,9 @@ +FROM postgres + +# install unzip +RUN apt-get update && apt-get install -y unzip curl && rm -rf /var/lib/apt/lists/* + +# download aws cli +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +RUN unzip awscliv2.zip +RUN ./aws/install From 4285438fe719f82664be131288f6a9ee20606e89 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 11:15:35 +0100 Subject: [PATCH 17/62] added postgres dockerfile --- docker/Dockerfile.postgres | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.postgres b/docker/Dockerfile.postgres index 247dc46f..0eb4fec0 100644 --- a/docker/Dockerfile.postgres +++ b/docker/Dockerfile.postgres @@ -5,5 +5,5 @@ RUN apt-get update && apt-get install -y unzip curl && rm -rf /var/lib/apt/lists # download aws cli RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" -RUN unzip awscliv2.zip +RUN unzip -q awscliv2.zip RUN ./aws/install From 137f733fef9539d77d0f5a87cd41239697b4793a Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 11:30:52 +0100 Subject: [PATCH 18/62] added oasst postgres --- ansible/deploy-to-node.yaml | 33 ++++++++------------------------- docker-compose.yaml | 3 ++- docker/Dockerfile.postgres | 2 +- 3 files changed, 11 insertions(+), 27 deletions(-) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index 4bb13bb6..bbadca6a 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -57,8 +57,9 @@ - name: Create postgres containers community.docker.docker_container: name: "oasst-{{ stack_name }}-postgres-{{ item.name }}" - image: postgres:15 + image: ghcr.io/laion-ai/open-assistant/oasst-postgres state: started + pull: true recreate: "{{ (stack_name == 'dev') | bool }}" restart_policy: always network_mode: "oasst-{{ stack_name }}" @@ -66,6 +67,12 @@ POSTGRES_USER: postgres POSTGRES_PASSWORD: "{{ postgres_password }}" POSTGRES_DB: postgres + S3_BUCKET_NAME: + "{{ lookup('ansible.builtin.env', 'S3_BUCKET_NAME') }}" + AWS_ACCESS_KEY_ID: + "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY') }}" + AWS_SECRET_ACCESS_KEY: + "{{ lookup('ansible.builtin.env', 'AWS_SECRET_KEY') }}" volumes: - "oasst-{{ stack_name }}-postgres-{{ item.name }}:/var/lib/postgresql/data" @@ -78,30 +85,6 @@ - name: backend - name: web - - name: Copy pgbackrest.conf to managed node - ansible.builtin.copy: - src: ./pgbackrest.conf - dest: "./{{ stack_name }}/pgbackrest.conf" - mode: 0644 - - - name: Create pgbackrest container - community.docker.docker_container: - name: "oasst-{{ stack_name }}-pgbackrest" - image: woblerr/pgbackrest:2.43 - state: "{{ 'started' if stack_name == 'production' else 'absent' }}" - network_mode: "oasst-{{ stack_name }}" - entrypoint: ["/bin/bash", "-c", "sleep infinity"] - volumes: - - "./{{ stack_name }}/pgbackrest.conf:/etc/pgbackrest/pgbackrest.conf" - - "oasst-{{ stack_name }}-postgres-backend:/var/lib/postgresql/data" - env: - PGBACKREST_REPO1_S3_BUCKET: - "{{ lookup('ansible.builtin.env', 'S3_BUCKET_NAME') }}" - PGBACKREST_REPO1_S3_KEY: - "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY') }}" - PGBACKREST_REPO1_S3_KEY_SECRET: - "{{ lookup('ansible.builtin.env', 'AWS_SECRET_KEY') }}" - - name: Run the oasst oasst-backend community.docker.docker_container: name: "oasst-{{ stack_name }}-backend" diff --git a/docker-compose.yaml b/docker-compose.yaml index 78192eb3..908457cd 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -18,7 +18,8 @@ services: # This DB is for the FastAPI Backend. db: - image: postgres + image: ghcr.io/laion-ai/open-assistant/oasst-postgres + pull_policy: always restart: always ports: - 5432:5432 diff --git a/docker/Dockerfile.postgres b/docker/Dockerfile.postgres index 0eb4fec0..b5b98c07 100644 --- a/docker/Dockerfile.postgres +++ b/docker/Dockerfile.postgres @@ -1,4 +1,4 @@ -FROM postgres +FROM postgres:15 # install unzip RUN apt-get update && apt-get install -y unzip curl && rm -rf /var/lib/apt/lists/* From db8fedf01089c4c0cd802274661f980bc1cc39c1 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:02:10 +0100 Subject: [PATCH 19/62] added region to aws --- ansible/deploy-to-node.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index bbadca6a..e8cd9757 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -73,6 +73,8 @@ "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY') }}" AWS_SECRET_ACCESS_KEY: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_KEY') }}" + AWS_DEFAULT_REGION: + "{{ lookup('ansible.builtin.env', 'S3_REGION') }}" volumes: - "oasst-{{ stack_name }}-postgres-{{ item.name }}:/var/lib/postgresql/data" From ca9af52c512ab48e28d856e8e9716c6cc8806b81 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:06:08 +0100 Subject: [PATCH 20/62] added region to aws --- .github/workflows/deploy-to-node.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy-to-node.yaml b/.github/workflows/deploy-to-node.yaml index f107d0af..f1b4558a 100644 --- a/.github/workflows/deploy-to-node.yaml +++ b/.github/workflows/deploy-to-node.yaml @@ -34,6 +34,7 @@ jobs: WEB_EMAIL_SERVER_USER: ${{ secrets.DEV_WEB_EMAIL_SERVER_USER }} WEB_NEXTAUTH_SECRET: ${{ secrets.NEXTAUTH_SECRET }} S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }} + S3_REGION: ${{ secrets.S3_REGION }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} steps: From 4379aadd877c260fa0b2e22b72f42ab5f28e42c6 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:21:16 +0100 Subject: [PATCH 21/62] added backup script --- docker/Dockerfile.postgres | 2 ++ docker/backup_pg_to_s3.sh | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100755 docker/backup_pg_to_s3.sh diff --git a/docker/Dockerfile.postgres b/docker/Dockerfile.postgres index b5b98c07..b78e5e3b 100644 --- a/docker/Dockerfile.postgres +++ b/docker/Dockerfile.postgres @@ -7,3 +7,5 @@ RUN apt-get update && apt-get install -y unzip curl && rm -rf /var/lib/apt/lists RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" RUN unzip -q awscliv2.zip RUN ./aws/install + +COPY ./backup_pg_to_s3.sh . \ No newline at end of file diff --git a/docker/backup_pg_to_s3.sh b/docker/backup_pg_to_s3.sh new file mode 100755 index 00000000..b3f4ab84 --- /dev/null +++ b/docker/backup_pg_to_s3.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -e +set -x + +# filename with timestamp +filename="postgres-$(date +%Y-%m-%d_%H-%M-%S).sql" + +# perform pg_dump +pg_dump -U postgres postgres > /tmp/$filename + +# upload to s3 +aws s3 cp /tmp/$filename s3://$S3_BUCKET_NAME/$filename \ No newline at end of file From 8b291fb29ab9c8b78f38a58403e9d39514634f40 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:22:01 +0100 Subject: [PATCH 22/62] fixed pre-commit --- docker/Dockerfile.postgres | 2 +- docker/backup_pg_to_s3.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile.postgres b/docker/Dockerfile.postgres index b78e5e3b..5c4aad80 100644 --- a/docker/Dockerfile.postgres +++ b/docker/Dockerfile.postgres @@ -8,4 +8,4 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2 RUN unzip -q awscliv2.zip RUN ./aws/install -COPY ./backup_pg_to_s3.sh . \ No newline at end of file +COPY ./backup_pg_to_s3.sh . diff --git a/docker/backup_pg_to_s3.sh b/docker/backup_pg_to_s3.sh index b3f4ab84..86ef1691 100755 --- a/docker/backup_pg_to_s3.sh +++ b/docker/backup_pg_to_s3.sh @@ -10,4 +10,4 @@ filename="postgres-$(date +%Y-%m-%d_%H-%M-%S).sql" pg_dump -U postgres postgres > /tmp/$filename # upload to s3 -aws s3 cp /tmp/$filename s3://$S3_BUCKET_NAME/$filename \ No newline at end of file +aws s3 cp /tmp/$filename s3://$S3_BUCKET_NAME/$filename From 1b41f8bbfc67febc947d5cba98d84cedba1e37c8 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:22:10 +0100 Subject: [PATCH 23/62] fixed pre-commit --- ansible/deploy-to-node.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index e8cd9757..f1cfe001 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -73,8 +73,7 @@ "{{ lookup('ansible.builtin.env', 'AWS_ACCESS_KEY') }}" AWS_SECRET_ACCESS_KEY: "{{ lookup('ansible.builtin.env', 'AWS_SECRET_KEY') }}" - AWS_DEFAULT_REGION: - "{{ lookup('ansible.builtin.env', 'S3_REGION') }}" + AWS_DEFAULT_REGION: "{{ lookup('ansible.builtin.env', 'S3_REGION') }}" volumes: - "oasst-{{ stack_name }}-postgres-{{ item.name }}:/var/lib/postgresql/data" From 8ed80525c1428e4d73782980c574e383c9eec3a3 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:23:42 +0100 Subject: [PATCH 24/62] updated backup script --- docker/backup_pg_to_s3.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/backup_pg_to_s3.sh b/docker/backup_pg_to_s3.sh index 86ef1691..ff509947 100755 --- a/docker/backup_pg_to_s3.sh +++ b/docker/backup_pg_to_s3.sh @@ -11,3 +11,5 @@ pg_dump -U postgres postgres > /tmp/$filename # upload to s3 aws s3 cp /tmp/$filename s3://$S3_BUCKET_NAME/$filename + +rm /tmp/$filename From 3749791bcecb49342bcc3051255954cb9139503c Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 12:25:26 +0100 Subject: [PATCH 25/62] moved oasst-postgres --- .github/workflows/build-postgres.yaml | 6 +++--- docker/{Dockerfile.postgres => oasst-postgres/Dockerfile} | 0 docker/{ => oasst-postgres}/backup_pg_to_s3.sh | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename docker/{Dockerfile.postgres => oasst-postgres/Dockerfile} (100%) rename docker/{ => oasst-postgres}/backup_pg_to_s3.sh (100%) diff --git a/.github/workflows/build-postgres.yaml b/.github/workflows/build-postgres.yaml index 6f6595f7..2522a1d7 100644 --- a/.github/workflows/build-postgres.yaml +++ b/.github/workflows/build-postgres.yaml @@ -5,13 +5,13 @@ on: branches: - main paths: - - docker/Dockerfile.postgres + - docker/oasst-postgres/** jobs: build-postgres: uses: ./.github/workflows/docker-build.yaml with: image-name: oasst-postgres - context: ./docker/ - dockerfile: docker/Dockerfile.postgres + context: ./docker/oasst-postgres + dockerfile: docker/oasst-postgres/Dockerfile build-args: "" \ No newline at end of file diff --git a/docker/Dockerfile.postgres b/docker/oasst-postgres/Dockerfile similarity index 100% rename from docker/Dockerfile.postgres rename to docker/oasst-postgres/Dockerfile diff --git a/docker/backup_pg_to_s3.sh b/docker/oasst-postgres/backup_pg_to_s3.sh similarity index 100% rename from docker/backup_pg_to_s3.sh rename to docker/oasst-postgres/backup_pg_to_s3.sh From 59997a573f103b976b678f07ec5ce100babd4cd6 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 14:29:00 +0100 Subject: [PATCH 26/62] deploy trigger From 0f896d910e0074d5095ef953ba26bb058ebb613a Mon Sep 17 00:00:00 2001 From: Andreas Koepf Date: Tue, 17 Jan 2023 17:50:17 +0000 Subject: [PATCH 27/62] make sure we enter READY_FOR_EXPORT after ranking --- backend/oasst_backend/tree_manager.py | 60 +++++++++++++++++---------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 265591c1..2f48bca0 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -489,7 +489,8 @@ class TreeManager: _, task = pr.store_ranking(interaction) - self.check_condition_for_scoring_state(task.message_tree_id) + ok, rankings_by_message = self.check_condition_for_scoring_state(task.message_tree_id) + self.update_message_ranks(task.message_tree_id, rankings_by_message) case protocol_schema.TextLabels: logger.info( @@ -589,39 +590,56 @@ class TreeManager: return True @managed_tx_method(CommitMode.COMMIT) - def check_condition_for_scoring_state(self, message_tree_id: UUID) -> bool: + def check_condition_for_scoring_state( + self, message_tree_id: UUID + ) -> Tuple[bool, dict[UUID, list[MessageReaction]]]: logger.debug(f"check_condition_for_scoring_state({message_tree_id=})") - mts: MessageTreeState - mts = self.db.query(MessageTreeState).filter(MessageTreeState.message_tree_id == message_tree_id).one() + + mts = self.pr.fetch_tree_state(message_tree_id) if not mts.active or mts.state != message_tree_state.State.RANKING: logger.debug(f"False {mts.active=}, {mts.state=}") - return False + return False, None ranking_role_filter = None if self.cfg.rank_prompter_replies else "assistant" rankings_by_message = self.query_tree_ranking_results(message_tree_id, role_filter=ranking_role_filter) for parent_msg_id, ranking in rankings_by_message.items(): if len(ranking) < self.cfg.num_required_rankings: logger.debug(f"False {parent_msg_id=} {len(ranking)=}") - return False + return False, None self._enter_state(mts, message_tree_state.State.READY_FOR_SCORING) - self.update_message_ranks(rankings_by_message) - return True + return True, rankings_by_message @managed_tx_method(CommitMode.COMMIT) - def update_message_ranks(self, rankings_by_message: Dict[int, int]) -> None: - for parent_msg_id, ranking in rankings_by_message.items(): - sorted_messages = [] - for msg_reaction in ranking: - sorted_messages.append(msg_reaction.payload.payload.ranked_message_ids) - logger.debug(f"SORTED MESSAGE {sorted_messages}") - consensus = ranked_pairs(sorted_messages) - logger.debug(f"CONSENSUS: {consensus}\n\n") - for rank, message_id in enumerate(consensus): - # set rank for each message_id for Message rows - msg = self.pr.fetch_message(message_id=message_id, fail_if_missing=True) - msg.rank = rank - self.db.add(msg) + def update_message_ranks(self, message_tree_id: UUID, rankings_by_message: Dict[int, int]) -> bool: + + mts = self.pr.fetch_tree_state(message_tree_id) + # check state, allow retry if in SCORING_FAILED state + if mts.state not in (message_tree_state.State.READY_FOR_SCORING, message_tree_state.State.SCORING_FAILED): + logger.debug(f"False {mts.active=}, {mts.state=}") + return False + + try: + for rankings in rankings_by_message.values(): + sorted_messages = [] + for msg_reaction in rankings: + sorted_messages.append(msg_reaction.payload.payload.ranked_message_ids) + logger.debug(f"SORTED MESSAGE {sorted_messages}") + consensus = ranked_pairs(sorted_messages) + logger.debug(f"CONSENSUS: {consensus}\n\n") + for rank, message_id in enumerate(consensus): + # set rank for each message_id for Message rows + msg = self.pr.fetch_message(message_id=message_id, fail_if_missing=True) + msg.rank = rank + self.db.add(msg) + + except Exception: + logger.exception(f"update_message_ranks({message_tree_id=}) failed") + self._enter_state(mts, message_tree_state.State.SCORING_FAILED) + return False + + self._enter_state(mts, message_tree_state.State.READY_FOR_EXPORT) + return True def _calculate_acceptance(self, labels: list[TextLabels]): # calculate acceptance based on spam label From 6b82b2cd13bf90f7b20bac2dd2f650b5fedbde13 Mon Sep 17 00:00:00 2001 From: dhug <38571110+danielpatrickhug@users.noreply.github.com> Date: Tue, 17 Jan 2023 15:07:39 -0500 Subject: [PATCH 28/62] Add api auth to user stat repo (#805) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added api_auth to user_stats_repository * pre_commit changes * Remove call with dev uuid Co-authored-by: Andreas Köpf --- backend/oasst_backend/user_stats_repository.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/backend/oasst_backend/user_stats_repository.py b/backend/oasst_backend/user_stats_repository.py index bdd0e2e9..fe466bfa 100644 --- a/backend/oasst_backend/user_stats_repository.py +++ b/backend/oasst_backend/user_stats_repository.py @@ -4,6 +4,7 @@ from uuid import UUID import sqlalchemy as sa from loguru import logger +from oasst_backend.config import settings from oasst_backend.models import Message, MessageReaction, Task, User, UserStats, UserStatsTimeFrame from oasst_backend.models.db_payload import ( LabelAssistantReplyPayload, @@ -291,13 +292,11 @@ WHERE if __name__ == "__main__": - from oasst_backend.api.deps import get_dummy_api_client + from oasst_backend.api.deps import api_auth from oasst_backend.database import engine - with Session(engine) as session: - api_client = get_dummy_api_client(session) - usr = UserStatsRepository(session) - # usr.update_all_time_frames() - # session.commit() - # usr.get_leader_board(UserStatsTimeFrame.total) - usr.get_user_stats_all_time_frames(UUID("0d6ff62a-0bea-4c56-ade8-b3e0520a10ce")) + with Session(engine) as db: + api_client = api_auth(settings.OFFICIAL_WEB_API_KEY, db=db) + usr = UserStatsRepository(db) + usr.update_all_time_frames() + db.commit() From 0a36e459d7b08aa67200c57316513bad12e764ec Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Wed, 18 Jan 2023 05:12:48 +0900 Subject: [PATCH 29/62] Guarantee all users get a name (#800) * Guarantee all users get a name and it is used when talking to the backend * Small code cleanup --- website/package-lock.json | 11 +++++++++++ website/package.json | 1 + website/src/lib/oasst_api_client.ts | 4 ++-- website/src/pages/api/auth/[...nextauth].ts | 19 ++++++++++++++++--- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/website/package-lock.json b/website/package-lock.json index 1fa3d14d..68ef2ba8 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -45,6 +45,7 @@ "sharp": "^0.31.3", "swr": "^2.0.0", "tailwindcss": "^3.2.4", + "unique-username-generator": "^1.1.3", "use-debounce": "^9.0.2" }, "devDependencies": { @@ -35987,6 +35988,11 @@ "imurmurhash": "^0.1.4" } }, + "node_modules/unique-username-generator": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/unique-username-generator/-/unique-username-generator-1.1.3.tgz", + "integrity": "sha512-TB6YdqPMKMpTSgxAzjZkKWtmpZPHvARoWreCKBpc1UrLFz/0C6Q96/qdjpLr9OXPCHk16sD1LHjTr3JDj7q2JA==" + }, "node_modules/unist-builder": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/unist-builder/-/unist-builder-2.0.3.tgz", @@ -64596,6 +64602,11 @@ "imurmurhash": "^0.1.4" } }, + "unique-username-generator": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/unique-username-generator/-/unique-username-generator-1.1.3.tgz", + "integrity": "sha512-TB6YdqPMKMpTSgxAzjZkKWtmpZPHvARoWreCKBpc1UrLFz/0C6Q96/qdjpLr9OXPCHk16sD1LHjTr3JDj7q2JA==" + }, "unist-builder": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/unist-builder/-/unist-builder-2.0.3.tgz", diff --git a/website/package.json b/website/package.json index 580d0be3..2e46d187 100644 --- a/website/package.json +++ b/website/package.json @@ -62,6 +62,7 @@ "sharp": "^0.31.3", "swr": "^2.0.0", "tailwindcss": "^3.2.4", + "unique-username-generator": "^1.1.3", "use-debounce": "^9.0.2" }, "devDependencies": { diff --git a/website/src/lib/oasst_api_client.ts b/website/src/lib/oasst_api_client.ts index fb11adec..d48a987c 100644 --- a/website/src/lib/oasst_api_client.ts +++ b/website/src/lib/oasst_api_client.ts @@ -113,7 +113,7 @@ export class OasstApiClient { type: taskType, user: { id: userToken.sub, - display_name: userToken.name || userToken.email, + display_name: userToken.name, auth_method: "local", }, }); @@ -146,7 +146,7 @@ export class OasstApiClient { type: updateType, user: { id: userToken.sub, - display_name: userToken.name || userToken.email, + display_name: userToken.name, auth_method: "local", }, task_id: taskId, diff --git a/website/src/pages/api/auth/[...nextauth].ts b/website/src/pages/api/auth/[...nextauth].ts index 691cbcba..c718ddce 100644 --- a/website/src/pages/api/auth/[...nextauth].ts +++ b/website/src/pages/api/auth/[...nextauth].ts @@ -7,6 +7,7 @@ import CredentialsProvider from "next-auth/providers/credentials"; import DiscordProvider from "next-auth/providers/discord"; import EmailProvider from "next-auth/providers/email"; import prisma from "src/lib/prismadb"; +import { generateUsername } from "unique-username-generator"; const providers: Provider[] = []; @@ -97,10 +98,11 @@ export const authOptions: AuthOptions = { * This let's use forward the role to the session object. */ async jwt({ token }) { - const { isNew, role } = await prisma.user.findUnique({ + const { isNew, name, role } = await prisma.user.findUnique({ where: { id: token.sub }, - select: { role: true, isNew: true }, + select: { name: true, role: true, isNew: true }, }); + token.name = name; token.role = role; token.isNew = isNew; return token; @@ -110,7 +112,18 @@ export const authOptions: AuthOptions = { /** * Update the user's role after they have successfully signed in */ - async signIn({ user, account }) { + async signIn({ user, account, isNewUser }) { + if (isNewUser && account.provider === "email") { + await prisma.user.update({ + data: { + name: generateUsername(), + }, + where: { + id: user.id, + }, + }); + } + // Get the admin list for the user's auth type. const adminForAccountType = adminUserMap.get(account.provider); From 8adc1da6f91737833260b5312c1fad2328c4a4be Mon Sep 17 00:00:00 2001 From: Oliver Date: Tue, 17 Jan 2023 20:22:19 +0000 Subject: [PATCH 30/62] Add notebooks for CodeT datasets augmentation --- .../codet-data/Augment_CodeT_codegen.ipynb | 189 ++++++++++++++++++ .../codet-data/Augment_CodeT_testgen.ipynb | 182 +++++++++++++++++ .../data-augmentation/codet-data/README.md | 15 ++ 3 files changed, 386 insertions(+) create mode 100644 notebooks/data-augmentation/codet-data/Augment_CodeT_codegen.ipynb create mode 100644 notebooks/data-augmentation/codet-data/Augment_CodeT_testgen.ipynb create mode 100644 notebooks/data-augmentation/codet-data/README.md diff --git a/notebooks/data-augmentation/codet-data/Augment_CodeT_codegen.ipynb b/notebooks/data-augmentation/codet-data/Augment_CodeT_codegen.ipynb new file mode 100644 index 00000000..99fa7ed0 --- /dev/null +++ b/notebooks/data-augmentation/codet-data/Augment_CodeT_codegen.ipynb @@ -0,0 +1,189 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook contains code to parse CodeT code generation prompt and solution data and modify to `(prompt, solution)` pairs outputted in a `.jsonl` file.\n", + "\n", + "Requirements: `requests`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "import requests\n", + "from typing import Dict, List, Tuple" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_FILES: List[str] = [\n", + " \"HumanEval_for_code_generation.jsonl\",\n", + " \"mbpp_sanitized_for_code_generation.jsonl\",\n", + "]\n", + "\n", + "OUT_FILES: List[str] = [\n", + " \"HumanEval_codegen.jsonl\",\n", + " \"mbpp_codegen.jsonl\",\n", + "]\n", + "\n", + "FILE_PATHS: List[Path] = [Path(f\"data/{data_file}\") for data_file in DATA_FILES]\n", + "\n", + "OUT_PATHS: List[Path] = [Path(f\"data/augmented/{out_file}\") for out_file in OUT_FILES]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def download_file(filename: str):\n", + " url = f\"https://raw.githubusercontent.com/microsoft/CodeT/main/CodeT/data/dataset/{filename}\"\n", + " response = requests.get(url)\n", + " with open(f\"data/{filename}\", \"wb\") as f:\n", + " f.write(response.content)\n", + "\n", + "\n", + "for filename in DATA_FILES:\n", + " download_file(filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can find the docstring, use its contents as the instruction (prefixed with \"Write a function corresponding to the docstring:\") and then use the content prior to the docstring and the canonical solution as the response." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def get_docstring_indices(prompt_lines: List[str]) -> Tuple[int, int]:\n", + " docstring_start, docstring_end = None, None\n", + "\n", + " for i, line in enumerate(prompt_lines):\n", + " if not (line.strip().startswith('\"\"\"') or line.strip().startswith(\"'''\")):\n", + " continue\n", + " if docstring_start:\n", + " docstring_end = i\n", + " break\n", + " docstring_start = i\n", + "\n", + " if docstring_end:\n", + " return docstring_start, docstring_end\n", + " raise ValueError(f\"No complete docstring found!\\n{prompt_lines}\")\n", + "\n", + "\n", + "def get_before(prompt_lines: List[str], before: int) -> List[str]:\n", + " before_lines = prompt_lines[:before]\n", + " return before_lines\n", + "\n", + "\n", + "def get_between(prompt_lines: List[str], start: int, end: int) -> List[str]:\n", + " between_lines = prompt_lines[start:end]\n", + " return between_lines" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_request_and_solution(sample: dict) -> Tuple[List[str], List[str]]:\n", + " prompt = sample[\"prompt\"]\n", + " prompt_lines = prompt.splitlines()\n", + "\n", + " docstring_start, docstring_end = get_docstring_indices(prompt_lines)\n", + "\n", + " # Extract prompt\n", + " in_docstring = get_between(prompt_lines, docstring_start, docstring_end)\n", + " if '\"\"\"' in in_docstring[0] or \"'''\" in in_docstring[0]:\n", + " in_docstring[0] = in_docstring[0].replace('\"\"\"', \"\").replace(\"...\", \"\").strip()\n", + " request = \"Write a Python function corresponding to the docstring: \" + \" \".join([p.strip() for p in in_docstring])\n", + "\n", + " # Extract solution\n", + " before_docstring = get_before(prompt_lines, docstring_start)\n", + " after_docstring = sample[\"canonical_solution\"].splitlines()\n", + " solution = before_docstring + after_docstring\n", + " # Gets rid of consecutive empty lines\n", + " solution = [v for i, v in enumerate(solution) if v != \"\" or v != solution[i - 1]]\n", + " solution = \"\\n\".join(solution)\n", + "\n", + " return request, solution" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def process_file(file_path: Path, out_path: Path):\n", + " lines = file_path.read_text().splitlines()\n", + " samples = list(map(json.loads, lines))\n", + "\n", + " output = []\n", + " for sample in samples:\n", + " prompt, solution = get_request_and_solution(sample)\n", + " output.append({\"prompt\": prompt, \"solution\": solution})\n", + "\n", + " with open(out_path, \"w\") as f:\n", + " for sample in output:\n", + " f.write(json.dumps(sample))\n", + " f.write(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "for file_path, out_path in zip(FILE_PATHS, OUT_PATHS):\n", + " process_file(file_path, out_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.5 ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "1f9a0efd3e4a33b8f30a65df6ca5a95cc3f93ce2f11519ee8c13fe711de61465" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/data-augmentation/codet-data/Augment_CodeT_testgen.ipynb b/notebooks/data-augmentation/codet-data/Augment_CodeT_testgen.ipynb new file mode 100644 index 00000000..0c9f1626 --- /dev/null +++ b/notebooks/data-augmentation/codet-data/Augment_CodeT_testgen.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook contains code to parse CodeT test case generation prompt and solution data and modify to `(prompt, solution)` pairs outputted in a `.jsonl` file.\n", + "\n", + "Requirements: `requests`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from pathlib import Path\n", + "import requests\n", + "from typing import List, Tuple" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_FILES: List[str] = [\n", + " \"HumanEval_for_test_case_generation.jsonl\",\n", + " \"mbpp_sanitized_for_test_case_generation.jsonl\",\n", + "]\n", + "\n", + "OUT_FILES: List[str] = [\n", + " \"HumanEval_testgen.jsonl\",\n", + " \"mbpp_testgen.jsonl\",\n", + "]\n", + "\n", + "FILE_PATHS: List[Path] = [Path(f\"data/{data_file}\") for data_file in DATA_FILES]\n", + "\n", + "OUT_PATHS: List[Path] = [Path(f\"data/augmented/{out_file}\") for out_file in OUT_FILES]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def download_file(filename: str):\n", + " url = f\"https://raw.githubusercontent.com/microsoft/CodeT/main/CodeT/data/dataset/{filename}\"\n", + " response = requests.get(url)\n", + " with open(f\"data/{filename}\", \"wb\") as f:\n", + " f.write(response.content)\n", + "\n", + "\n", + "for filename in DATA_FILES:\n", + " download_file(filename)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def get_docstring_indices(prompt_lines: List[str]) -> Tuple[int, int]:\n", + " docstring_start, docstring_end = None, None\n", + "\n", + " for i, line in enumerate(prompt_lines):\n", + " if not (line.strip().startswith('\"\"\"') or line.strip().startswith(\"'''\")):\n", + " continue\n", + " if docstring_start:\n", + " docstring_end = i\n", + " break\n", + " docstring_start = i\n", + "\n", + " if docstring_end:\n", + " return docstring_start, docstring_end\n", + " raise ValueError(f\"No complete docstring found!\\n{prompt_lines}\")\n", + "\n", + "\n", + "def get_between(prompt_lines: List[str], start: int, end: int) -> List[str]:\n", + " between_lines = prompt_lines[start:end]\n", + " return between_lines" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_request(sample: dict) -> List[str]:\n", + " prompt = sample[\"prompt\"]\n", + " prompt_lines = prompt.splitlines()\n", + "\n", + " docstring_start, docstring_end = get_docstring_indices(prompt_lines)\n", + "\n", + " # Extract prompt\n", + " in_docstring = get_between(prompt_lines, docstring_start, docstring_end)\n", + " if '\"\"\"' in in_docstring[0] or \"'''\" in in_docstring[0]:\n", + " in_docstring[0] = in_docstring[0].replace('\"\"\"', \"\").replace(\"...\", \"\").strip()\n", + " request = \"Write a test for a Python function with the following docstring: \" + \" \".join(\n", + " [p.strip() for p in in_docstring]\n", + " )\n", + "\n", + " return request\n", + "\n", + "\n", + "def get_test_code(sample: dict) -> List[str]:\n", + " test = sample[\"test\"]\n", + " test_lines = test.splitlines()\n", + " start = 0\n", + " for i, line in enumerate(test_lines):\n", + " if \"def check(\" in line:\n", + " start = i\n", + " return \"\\n\".join(test_lines[start:])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def process_file(file_path: Path, out_path: Path):\n", + " lines = file_path.read_text().splitlines()\n", + " samples = list(map(json.loads, lines))\n", + "\n", + " output = []\n", + " for sample in samples:\n", + " prompt = get_request(sample)\n", + " test = get_test_code(sample)\n", + " output.append({\"prompt\": prompt, \"solution\": test})\n", + "\n", + " with open(out_path, \"w\") as f:\n", + " for sample in output:\n", + " f.write(json.dumps(sample))\n", + " f.write(\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "for file_path, out_path in zip(FILE_PATHS, OUT_PATHS):\n", + " process_file(file_path, out_path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.5 ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "1f9a0efd3e4a33b8f30a65df6ca5a95cc3f93ce2f11519ee8c13fe711de61465" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/data-augmentation/codet-data/README.md b/notebooks/data-augmentation/codet-data/README.md new file mode 100644 index 00000000..985883b4 --- /dev/null +++ b/notebooks/data-augmentation/codet-data/README.md @@ -0,0 +1,15 @@ +# CodeT Datasets + +This folder contains two notebooks. + +One will download the data used for Microsoft CodeT for tuning a model for +Python code generation from function docstrings, augment the data into prompt +and solution pairs and write them to `.jsonl` files. + +The other will download the data used for Microsoft CodeT for tuning a model for +Python test generation from corresponding function docstrings, augment the data +into prompt and solution pairs and write them to `.jsonl` files. + +## Requirements + +Both notebooks require the library `requests`. From 364790947dc39e65fcb6c0cc448e789f5b73605d Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 21:47:54 +0100 Subject: [PATCH 31/62] hacky solution to initial missing username --- website/src/components/Header/UserMenu.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/src/components/Header/UserMenu.tsx b/website/src/components/Header/UserMenu.tsx index 7a470464..99ec01f1 100644 --- a/website/src/components/Header/UserMenu.tsx +++ b/website/src/components/Header/UserMenu.tsx @@ -74,7 +74,7 @@ export function UserMenu() { - {session.user.name || session.user.email} + {session.user.name || "New User"} From 18e7eb887633ef6d8d1e56e7d2dd87a483da2b79 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 22:23:33 +0100 Subject: [PATCH 32/62] updated deploy playbook with env variables --- .github/workflows/deploy-to-node.yaml | 3 +++ ansible/deploy-to-node.yaml | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/deploy-to-node.yaml b/.github/workflows/deploy-to-node.yaml index f1b4558a..8774f7f6 100644 --- a/.github/workflows/deploy-to-node.yaml +++ b/.github/workflows/deploy-to-node.yaml @@ -37,6 +37,9 @@ jobs: S3_REGION: ${{ secrets.S3_REGION }} AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }} AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }} + MAX_ACTIVE_TREES: ${{ vars.MAX_ACTIVE_TREES }} + MAX_TREE_DEPTH: ${{ vars.MAX_TREE_DEPTH }} + GOAL_TREE_SIZE: ${{ vars.GOAL_TREE_SIZE }} steps: - name: Checkout uses: actions/checkout@v2 diff --git a/ansible/deploy-to-node.yaml b/ansible/deploy-to-node.yaml index f1cfe001..ec25726f 100644 --- a/ansible/deploy-to-node.yaml +++ b/ansible/deploy-to-node.yaml @@ -109,6 +109,15 @@ DEBUG_SKIP_TOXICITY_CALCULATION: "{{ 'true' if stack_name == 'dev' else 'false' }}" OFFICIAL_WEB_API_KEY: "{{ web_api_key }}" + TREE_MANAGER__MAX_ACTIVE_TREES: + "{{ lookup('ansible.builtin.env', 'MAX_ACTIVE_TREES') | + default('10', true) }}" + TREE_MANAGER__MAX_TREE_DEPTH: + "{{ lookup('ansible.builtin.env', 'MAX_TREE_DEPTH') | default('5', + true) }}" + TREE_MANAGER__GOAL_TREE_SIZE: + "{{ lookup('ansible.builtin.env', 'GOAL_TREE_SIZE') | default('15', + true) }}" ports: - "{{ backend_port }}:8080" From 51a1d9a841ab733f5813b16925b0ecd144a324f0 Mon Sep 17 00:00:00 2001 From: Yannic Kilcher Date: Tue, 17 Jan 2023 22:33:23 +0100 Subject: [PATCH 33/62] fixed white-space issue for messages that contain code --- website/src/components/Messages/MessageTableEntry.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/website/src/components/Messages/MessageTableEntry.tsx b/website/src/components/Messages/MessageTableEntry.tsx index 2af60bbe..8e9d03b6 100644 --- a/website/src/components/Messages/MessageTableEntry.tsx +++ b/website/src/components/Messages/MessageTableEntry.tsx @@ -30,7 +30,12 @@ export function MessageTableEntry(props: MessageTableEntryProps) { {props.enabled ? ( - + {item.text} From 98f47f86bc269289dfe4380f051824a806a86116 Mon Sep 17 00:00:00 2001 From: Nil Andreu <65730003+Nil-Andreu@users.noreply.github.com> Date: Tue, 17 Jan 2023 23:50:03 +0100 Subject: [PATCH 34/62] Documentation FAQ (#581) * [NEW] FAQ module --- docs/docs/faq/README.md | 3 ++ docs/docs/faq/faq.md | 65 +++++++++++++++++++++++++++++++++++++++++ docs/sidebars.js | 9 ++++++ 3 files changed, 77 insertions(+) create mode 100644 docs/docs/faq/README.md create mode 100644 docs/docs/faq/faq.md diff --git a/docs/docs/faq/README.md b/docs/docs/faq/README.md new file mode 100644 index 00000000..6b510c97 --- /dev/null +++ b/docs/docs/faq/README.md @@ -0,0 +1,3 @@ +# Frequently Asked Questions + +In this page, there are some of the most frequently asked questions. diff --git a/docs/docs/faq/faq.md b/docs/docs/faq/faq.md new file mode 100644 index 00000000..0db57d30 --- /dev/null +++ b/docs/docs/faq/faq.md @@ -0,0 +1,65 @@ +### Docker-Compose instead of Docker Compose + +If you are using `docker-compose` instead of `docker compose` (note the " " +instead of the "-"), you should update your docker cli to the latest version. +`docker compose` is the most recent version and should be used instead of +`docker-compose` + +For more details and information check out +[this SO thread](https://stackoverflow.com/questions/66514436/difference-between-docker-compose-and-docker-compose) +that explains it all in detail. + +### Pre-commit + +We are using pre-commit to ensure the quality of the code as well as the same +code standard. + +The steps that you need to follow to be able to use it are: + +```bash + # install pre-commit in your python environment + pip3 install pre-commit + + # install pre-commit in your github configuration + pre-commit install +``` + +So from now on, in your next commits it will run the `pre-commit` on the files +that have been staged. If there has been any error, you will need to solve that, +and then stage+commit again the changes. + +## Docker Cannot Start Container: Permission Denied + +Instead of running docker with the root command always, you could create a +`docker` group with granted permissions (root): + +```bash + # Create new linux user + sudo groupadd docker + + # Add the actual user to the group + sudo usermod -aG docker $USER + + # Log in the group (apply the group changes to actual terminal session) + newgrp docker +``` + +After that, you should be able to run docker: `docker run .`. In the case you +still are not able, can try to reboot terminal: + +```bash + reboot +``` + +### Docker Cannot Stop Container + +If you try to shut down the services (`docker-compose down`), and you are +getting permission denied (using root user), you can try the following: + +```bash + # Restart docker daemon + sudo systemctl restart docker.socket docker.service + + # And remove the container + docker rm -f +``` diff --git a/docs/sidebars.js b/docs/sidebars.js index 2f7baedf..83063239 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -70,6 +70,15 @@ const sidebars = { }, items: ["presentations/list"], }, + { + type: "category", + label: "FAQ", + link: { + type: "doc", + id: "faq/README", + }, + items: ["faq/faq"], + }, ], }; From acaa56ebb004eddf257af526df5e529bf11b5c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Wed, 18 Jan 2023 00:49:28 +0100 Subject: [PATCH 35/62] fix ranking assert for 1st level children --- backend/oasst_backend/tree_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index 2f48bca0..a837071a 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -267,7 +267,7 @@ class TreeManager: ranking_parent_id = random.choice(incomplete_rankings).parent_id messages = self.pr.fetch_message_conversation(ranking_parent_id) - assert len(messages) > 1 and messages[-1].id == ranking_parent_id + assert len(messages) > 0 and messages[-1].id == ranking_parent_id ranking_parent = messages[-1] assert not ranking_parent.deleted and ranking_parent.review_result conversation = prepare_conversation(messages) From 718faa098e8f1d5fa3c677715334a8c217d9a602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20K=C3=B6pf?= Date: Wed, 18 Jan 2023 00:54:19 +0100 Subject: [PATCH 36/62] Add combined TreeManager stats endpoint (#816) --- backend/oasst_backend/api/v1/stats.py | 32 +++++++++++++++ backend/oasst_backend/tree_manager.py | 56 ++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/backend/oasst_backend/api/v1/stats.py b/backend/oasst_backend/api/v1/stats.py index 1aaffb1b..f6bbbdf3 100644 --- a/backend/oasst_backend/api/v1/stats.py +++ b/backend/oasst_backend/api/v1/stats.py @@ -2,6 +2,7 @@ from fastapi import APIRouter, Depends from oasst_backend.api import deps from oasst_backend.models import ApiClient from oasst_backend.prompt_repository import PromptRepository +from oasst_backend.tree_manager import TreeManager, TreeManagerStats, TreeMessageCountStats from oasst_shared.schemas import protocol from sqlmodel import Session @@ -15,3 +16,34 @@ def get_message_stats( ): pr = PromptRepository(db, api_client) return pr.get_stats() + + +@router.get("/tree_manager/state_counts", response_model=dict[str, int]) +def get_tree_manager__state_counts( + db: Session = Depends(deps.get_db), + api_client: ApiClient = Depends(deps.get_trusted_api_client), +): + pr = PromptRepository(db, api_client) + tm = TreeManager(db, pr) + return tm.tree_counts_by_state() + + +@router.get("/tree_manager/message_counts", response_model=list[TreeMessageCountStats]) +def get_tree_manager__message_counts( + only_active: bool = True, + db: Session = Depends(deps.get_db), + api_client: ApiClient = Depends(deps.get_trusted_api_client), +): + pr = PromptRepository(db, api_client) + tm = TreeManager(db, pr) + return tm.tree_message_count_stats(only_active=only_active) + + +@router.get("/tree_manager", response_model=TreeManagerStats) +def get_tree_manager__stats( + db: Session = Depends(deps.get_db), + api_client: ApiClient = Depends(deps.get_trusted_api_client), +): + pr = PromptRepository(db, api_client) + tm = TreeManager(db, pr) + return tm.stats() diff --git a/backend/oasst_backend/tree_manager.py b/backend/oasst_backend/tree_manager.py index a837071a..64484868 100644 --- a/backend/oasst_backend/tree_manager.py +++ b/backend/oasst_backend/tree_manager.py @@ -1,4 +1,5 @@ import random +from datetime import datetime from enum import Enum from http import HTTPStatus from typing import Any, Dict, List, Optional, Tuple @@ -69,6 +70,25 @@ class IncompleteRankingsRow(pydantic.BaseModel): orm_mode = True +class TreeMessageCountStats(pydantic.BaseModel): + message_tree_id: UUID + state: str + depth: int + oldest: datetime + youngest: datetime + count: int + goal_tree_size: int + + @property + def completed(self) -> int: + return self.count / self.goal_tree_size + + +class TreeManagerStats(pydantic.BaseModel): + state_counts: dict[str, int] + message_counts: list[TreeMessageCountStats] + + class TreeManager: _all_text_labels = list(map(lambda x: x.value, protocol_schema.TextLabel)) @@ -924,6 +944,40 @@ INNER JOIN message_reaction mr ON mr.task_id = t.id AND mr.payload_type = 'Ranki active=True, ) + def tree_counts_by_state(self) -> dict[str, int]: + qry = self.db.query( + MessageTreeState.state, func.count(MessageTreeState.message_tree_id).label("count") + ).group_by(MessageTreeState.state) + return {x["state"]: x["count"] for x in qry} + + def tree_message_count_stats(self, only_active: bool = True) -> list[TreeMessageCountStats]: + qry = ( + self.db.query( + MessageTreeState.message_tree_id, + func.max(Message.depth).label("depth"), + func.min(Message.created_date).label("oldest"), + func.max(Message.created_date).label("youngest"), + func.count(Message.id).label("count"), + MessageTreeState.goal_tree_size, + MessageTreeState.state, + ) + .select_from(MessageTreeState) + .join(Message, MessageTreeState.message_tree_id == Message.message_tree_id) + .filter(not_(Message.deleted)) + .group_by(MessageTreeState.message_tree_id) + ) + + if only_active: + qry.filter(MessageTreeState.active) + + return [TreeMessageCountStats(**x) for x in qry] + + def stats(self) -> TreeManagerStats: + return TreeManagerStats( + state_counts=self.tree_counts_by_state(), + message_counts=self.tree_message_count_stats(only_active=True), + ) + if __name__ == "__main__": from oasst_backend.api.deps import api_auth @@ -942,7 +996,7 @@ if __name__ == "__main__": # print("query_num_active_trees", tm.query_num_active_trees()) # print("query_incomplete_rankings", tm.query_incomplete_rankings()) - print("query_replies_need_review", tm.query_replies_need_review()) + # print("query_replies_need_review", tm.query_replies_need_review()) # print("query_incomplete_initial_prompt_reviews", tm.query_prompts_need_review()) # print("query_extendible_trees", tm.query_extendible_trees()) # print("query_extendible_parents", tm.query_extendible_parents()) From 0c1bb8df05bc3e6c47da861c897f5af0de8cc998 Mon Sep 17 00:00:00 2001 From: rsandb Date: Tue, 17 Jan 2023 21:14:10 -0600 Subject: [PATCH 37/62] update emptystate to be used in specific pages --- website/src/components/EmptyState.tsx | 15 --------------- website/src/pages/404.tsx | 7 +++---- website/src/pages/500.tsx | 17 ++++++++++++----- website/src/pages/tasks/random.tsx | 5 +++-- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/website/src/components/EmptyState.tsx b/website/src/components/EmptyState.tsx index 825dc610..f6245204 100644 --- a/website/src/components/EmptyState.tsx +++ b/website/src/components/EmptyState.tsx @@ -1,6 +1,5 @@ import { Box, Link, Text, useColorModeValue } from "@chakra-ui/react"; import { useRouter } from "next/router"; -import { FiAlertTriangle } from "react-icons/fi"; import { IconType } from "react-icons/lib"; type EmptyStateProps = { @@ -24,17 +23,3 @@ export const EmptyState = (props: EmptyStateProps) => { ); }; - -export const TaskEmptyState = () => { - return ; -}; - -export const PageEmptyState = () => { - return ; -}; - -export const ServerEmptyState = () => { - return ( - - ); -}; diff --git a/website/src/pages/404.tsx b/website/src/pages/404.tsx index f4c09bbf..afe1d080 100644 --- a/website/src/pages/404.tsx +++ b/website/src/pages/404.tsx @@ -1,8 +1,7 @@ -import { Box, Button, Center, Link, Text, useColorModeValue } from "@chakra-ui/react"; +import { Box, Button, Center, Link, Text } from "@chakra-ui/react"; import Head from "next/head"; -import { useRouter } from "next/router"; import { FiAlertTriangle } from "react-icons/fi"; -import { PageEmptyState } from "src/components/EmptyState"; +import { EmptyState } from "src/components/EmptyState"; import { getTransparentHeaderLayout } from "src/components/Layout"; function Error() { @@ -13,7 +12,7 @@ function Error() {
- + If you were trying to contribute data but ended up here, please file a bug.