From 1750cecb54cb8691e26d6b26a9861e9236ddd7a7 Mon Sep 17 00:00:00 2001 From: markcheeky <10684818+markcheeky@users.noreply.github.com> Date: Sun, 1 Jan 2023 13:18:15 +0100 Subject: [PATCH 01/34] Create docs/supervised_datasets.md, suggested by yk in issue 186 --- docs/supervised_datasets.md | 59 +++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 docs/supervised_datasets.md diff --git a/docs/supervised_datasets.md b/docs/supervised_datasets.md new file mode 100644 index 00000000..38d4ba2c --- /dev/null +++ b/docs/supervised_datasets.md @@ -0,0 +1,59 @@ +# Supervised datasets + +For discussion about usage of supervised data see issue . + + +## Motivation + +An important part of making the assistant useful is to teach it to understand and follow instructions, and to perform large set of tasks well. + +While RLHF seems like the main ingredient, using existing supervised data might help. + +There are two large-scale projects in the area of instruction-following / multitask learning: Promptsource and Natural Instructions - +these projects crowdsourced templates and turned existing NLP datasets into instruction-following seq2seq form in natural langauge. +They include both long-output training examples like generating a sentence that is a likely consequence of sentence in the prompt, and +short-output, like rating prediction from review. (Pre-)training on such datasets should help model understand and follow instructions +and teach it many abilities neccessary to perform a large set of tasks correctly. However, these data are not dialog-like - they do not +look like a normal conversation. + +There are also supervised dialog datasets such as Blended Skill Talk or SODA. In constrast to instruction-following datasets, dialog data +is not as focused on "academic tasks" or correctness, but encourage the model to respond naturally like a person would. + +### Promptsource +- GitHub: +- paper: [Multitask Prompted Training Enables Zero-Shot Task Generalization](https://arxiv.org/abs/2110.08207) +- project for preparing templates and working with them +- they generated a dataset using the templates: + - + - (with multilingual data but English prompt) + - (with multilingual data and machine-translated prompt) +- they trained zero-shot models (= models for following instructions in the input) + - based on T5 architecture (encoder-decoder) called T0 family (and MT0 for multilingual) + - and based on GPT architecture (decoder-only) called BloomZ family + - Huggingface demo: [T0](https://huggingface.co/bigscience/T0pp), [MT0](https://huggingface.co/bigscience/mt0-large), [BloomZ](https://huggingface.co/bigscience/bloomz), + - GitHub repo for T0: + - GitHub repo for BloomZ and MT0: + + +### Natural instructions +- GitHub: +- paper: [Super-NaturalInstructions: Generalization via Declarative Instructions on 1600+ NLP Tasks](https://arxiv.org/abs/2204.07705) +- they crowdsource directly the data prepared for instruction following (and learning from a few examples) +- the GitHub repo = the dataset. It contains jsons +- they trained zero-shot and in-context few-shot models (in multiple sizes): + - mT5 architecture (encoder-decoder, multilingual pretraining) + - Huggingface demo few-shot: + - Huggingface demo zero-shot: + + +### Blended Skill Talk +- used by Facebook in Blenderbot project +- HuggingFace dataset: +- example model trained on it: + + +### SODA +- GitHub: +- paper: + + From 53ab8fb479d6b4773d0f5fe608451a98c3a5fa45 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Mon, 2 Jan 2023 14:21:39 +0900 Subject: [PATCH 02/34] Adding a bare bones unstyled terms of service and privacy policy --- website/src/pages/privacy-policy.tsx | 414 +++++++++++++++++++++++++ website/src/pages/terms-of-service.tsx | 159 ++++++++++ 2 files changed, 573 insertions(+) create mode 100644 website/src/pages/privacy-policy.tsx create mode 100644 website/src/pages/terms-of-service.tsx diff --git a/website/src/pages/privacy-policy.tsx b/website/src/pages/privacy-policy.tsx new file mode 100644 index 00000000..42f439fc --- /dev/null +++ b/website/src/pages/privacy-policy.tsx @@ -0,0 +1,414 @@ +import { Container, Heading } from "@chakra-ui/react"; +import Head from "next/head"; +import { Footer } from "src/components/Footer"; +import { Header } from "src/components/Header"; + +const PrivacyPolicy = () => { + return ( + <> + + Open Assistant Privacy Policy + + +
+ + + Privacy Policy + + Overview + + + We are pleased that you are interested in our work and welcome you to our website laion.ai. In this Privacy + Policy you will learn which personal data we process when you visit our website and to what kind of purpose, + and also what rights you have regarding these data. Categorically, we only store data as long as we need + them. There is no legal obligation to provide us with personal data. Automated decision-making, as per + Article 22 of the EU-GDPR, will not happen. + + + 1. Definitions + + + We are required by law that personal data are processed lawfully, in good faith, and in a manner that can be + comprehended by the persons who are affected (“lawfulness, fair processing, transparency”). To this end, we + hereby inform you about the individual legal definitions of the European General Data Protection Regulation + (GDPR) and the new German Federal Data Protection Act, which are also used in these data privacy + regulations. + + + + 1.1 Personal data + + + + 'Personal data' means any information relating to an identified or identifiable natural person + (hereinafter the 'data subject'). A natural person is considered to be identifiable if he or she + can be identified directly or indirectly, in particular by association with an identifier such as a name, an + identification number, location data, an online identifier, or one or more special features which express + the physical, physiological, genetic, mental, economic, cultural or social identity of the natural person. + + + + 1.2 Restriction of processing + + + + 'Restriction of processing' means the marking of stored personal data with the aim of limiting its + processing in the future. + + + + 1.3 Profiling + + + + 'Profiling' means any form of automated processing of personal data consisting of the use of + personal data to evaluate certain personal aspects relating to a natural person, in particular to analyse or + predict aspects concerning that natural person's performance at work, economic situation, health, + personal preferences, interests, reliability, behaviour, location or movements. + + + + 1.4 Pseudonymization + + + + 'Pseudonymization' means the processing of personal data in such a manner that the personal data + can no longer be attributed to a specific data subject without the use of additional information, provided + that such additional information is kept separately and is subject to technical and organizational measures + to ensure that the personal data is not attributed to an identified or identifiable natural person + + + + 1.5 Filing system + + + + 'Filing system' means any structured set of personal data which is accessible according to + specific criteria, whether centralized, decentralized or dispersed on a functional or geographical basis. + + + + 1.6 Controller + + + + 'Controller' means the natural or legal person, public authority, agency or other body which, + alone or jointly with others, determines the purposes and means of the processing of personal data. Where + the purposes and means of such processing are determined by European Union or Member State law, the + controller or the specific criteria for its nomination may be provided for by European Union or Member State + law. + + + + 1.7 Processor + + + + 'Processor' means a natural or legal person, public authority, agency or other body which + processes personal data on behalf of the controller. + + + + 1.8 Recipient + + + + 'Recipient' means a natural or legal person, public authority, agency or another body, to which + the personal data is disclosed, whether a third party or not. However, public authorities which may receive + potentially personal data in the framework of a particular inquiry in accordance with European Union or + Member State law shall not be regarded as recipients. The processing of that data by those public + authorities shall be in compliance with the applicable data protection rules according to the purposes of + the processing. + + + + 1.9 Third party + + + + A 'third party' means a natural or legal person, public authority, agency or body other than the + data subject, controller, processor and persons who, under the direct authority of the controller or + processor, are authorized to process personal data. + + + 2. Responsible controller + + Responsible controller is: LAION e.V., Marie-Henning-Weg 143, 21035 Hamburg, Germany + + 3. Data we collect + + Open Assistant tracks data in the following conditions + + + 3.1 Using the Discord Bot + + + + When using the Open Assistant Discord bot, we privately track and store the unique Discord ID of the user + submitting responses. Each submitted response is associated with the user’s Discord ID. + + + + 3.1 Using the Website + + + + When a user registers an account with the website we privately track and store either the unique Discord ID + of the user or the unique Email of the registered user. When a user submits responses we store: +
    +
  1. When registered using Discord, we associate the unique Discord ID with each submitted response
  2. +
  3. When registered using Email, we associate a unique pseudonymous ID with each submitted response
  4. +
+
+ 4. Inquiries + + + When you contact us via e-mail, telephone or telefax, your inquiry, including all personal data arising + thereof will be stored by us for the purpose of processing your request. We will not pass on these data + without your consent. The processing of these data is based on Article 6 (1) (1) (b) GDPR, if your inquiry + is related to the fulfilment of a contract concluded with us or required for the implementation of + pre-contractual measures. Furthermore, the processing is based on Article 6 (1) (1) (f) GDPR, because we + have a legitimate interest in the effective handling of requests sent to us. In addition, according to + Article 6 (1) (1) (c) GDPR we are also entitled to the processing of the above-mentioned data, because we + are legally bound to enable fast electronic contact and immediate communication. Of course, your data will + only be used strictly according to purpose and only for processing and responding to your request. After + final processing, your data will immediately be anonymized or deleted, unless we are bound by a legally + prescribed storage period. + + + 5. Processors + + + In principle, we will never pass on your personal data to third parties without your explicit consent. + However, just as every modern business we cooperate with data processors in order to be able to offer you + the best possible uninterrupted service. When we cooperate with external service providers, regular order + processing is performed, based on Article 28 GDPR. For this purpose, we enter into respective agreements + with our partners, in order to safeguard the protection of your data. For processing your data, we only use + carefully selected processors. They are bound by our instructions, and regularly controlled by us. We only + commission external service provider who have guaranteed that all data processing procedures are performed + in unison with data protection regulations. Receivers of personal data may be: Hosting companies and Hosting + service providers + + + 6. Children and young people + + + In principle, our offer is directed towards adults. Children and young people under the age of 16 are not + allowed to transmit personal data to us without the consent of their parents or legal guardians. + + + 7. Your rights + + + If your personal data is processed on the basis of consent which you have given us, you have the right to + revoke your consent at any time. The revocation of consent does not affect the legality of the processing + performed on the basis of the consent until the time of revocation. You can contact us at any time to + exercise your right to revoke consent. + + + + 7.2 Right to confirmation + + + + You have the right to request confirmation from the controller that we are processing personal data + concerning you. You can request this confirmation at any time using the contact details above. + + + + 7.3 Right to information + + + + In the event that personal data is processed, you can request information about this personal data and the + following information at any time: the purposes of the processing, the categories of personal data being + processed, the recipients or categories of recipients to whom the personal data has been or is being + disclosed, in particular in the case of recipients in third countries or international organizations, if + possible, the planned duration for which the personal data is stored or, if this is not, possible, the + criteria for determining this duration, the existence of a right to rectification or erasure of the personal + data concerning you, or to a restriction of processing by the controller or a right to object to such + processing, the existence of a right to lodge a complaint with a supervisory authority, if the personal data + is not collected from the data subject, all available information on the source of the data, the existence + of automated decision-making, including profiling, in accordance with Article 22 (1) and (4) GDPR and, at + least in these cases, meaningful information about the logic involved and the scope and intended impact of + such processing on the data subject. If personal data is transferred to a third country or to an + international organization, you have the right to be informed of the appropriate safeguards under Article 46 + of the GDPR in connection with the transfer. We provide a copy of the personal data that is the subject of + the processing. For any additional copies you request of a person, we may charge a reasonable fee based on + our administrative costs. If your request is submitted electronically, the information must be provided in a + standard electronic format, unless otherwise stated. The right to receive a copy under paragraph 3 shall not + affect the rights and freedoms of others. + + + + 7.4 Right to rectification + + + + You have the right to demand the immediate correction of incorrect personal data concerning you. Taking into + account the purposes of processing, you have the right to request the completion of incomplete personal + data, including by means of a supplementary statement.{" "} + + + + 7.4 Right to rectification + + + + You have the right to demand the immediate correction of incorrect personal data concerning you. Taking into + account the purposes of processing, you have the right to request the completion of incomplete personal + data, including by means of a supplementary statement. + + + + 7.5 Right to erasure (“right to be forgotten“) + + + + You have the right to demand that the controller erase personal data concerning you without undue delay, and + we are obligated to erase personal data without undue delay where one of the following grounds applies: the + personal data are no longer necessary in relation to the purposes for which they were collected or otherwise + processed, the data subject withdraws the consent on which the processing is based according to point (a) of + Article 6(1), or point (a) of Article 9(2), and there is no other legal ground for the processing, the data + subject objects to the processing pursuant to Article 21(1) GDPR and there are no overriding legitimate + grounds for the processing, or the data subject objects to the processing pursuant to Article 21(2) GDPR, + the personal data have been unlawfully processed, personal data must be erased for compliance with a legal + obligation in Union or Member State law to which the controller is subject, the personal data was collected + in relation to the offer of information society services referred to in Article 8(1) GDPR. If the controller + has made the personal data public and is obliged pursuant to paragraph 1 to erase the personal data, the + controller, taking account of available technology and the cost of implementation, shall take reasonable + steps, including technical measures, to inform controllers which are processing the personal data that the + data subject has requested the erasure by such controllers of any links to, or copy or replication of, that + personal data. The right to erasure (“right to be forgotten“) does not apply to the extent that the + processing is necessary: to exercise the right of freedom of expression and information, for compliance with + a legal obligation which requires processing by Union or Member, State law to which the controller is + subject or for the performance of a task carried out in the public interest or in the exercise of official + authority vested in the controller, for reasons of public interest in the area of public health in + accordance with points (h) and (i) of Article 9(2) as well as Article 9(3) GDPR, for archiving purposes in + the public interest, scientific or historical research purposes or statistical purposes in accordance with + Article 89(1) GDPR in so far as the right referred to in paragraph 1 is likely to render impossible or + seriously impair the achievement of the objectives of that processing; or for the establishment, exercise or + defense of legal claims + + + + 7.6 Right to restriction of processing + + + + You have the right to request that we restrict the processing of your personal data if any of the following + conditions apply: the accuracy of the personal data is contested by the data subject, for a period enabling + the controller to verify the accuracy of the personal data, the processing is unlawful and the data subject + opposes the erasure of the personal data and requests the restriction of their use instead, the controller + no longer needs the personal data for the purposes of the processing, but the data is required by the data + subject for the establishment, exercise or defense of legal claims, or the data subject has objected to + processing pursuant to Article 21(1) GDPR pending the verification whether the legitimate grounds of the + controller override those of the data subject In the event that processing has been restricted under the + aforementioned conditions, this personal data shall – with the exception of storage – only be processed with + the data subject’s consent or for the establishment, exercise or defense of legal claims or for the + protection of the rights of another natural or legal person or for reasons of important public interest of + the Union or of a Member State. In order to exercise the right to restrict processing, the data subject may + contact us at any time using the contact details provided above. + + + + 7.7 Right to data portability + + + + You have the right to receive the personal data concerning you which you have provided to us in a + structured, commonly used and machine-readable format and have the right to transmit that data to another + controller without hindrance from the controller to which the personal data have been provided, to the + extent that: the processing is based on consent pursuant to point (a) of Article 6 (1) or point (a) of + Article 9 (2) or on a contract pursuant to point (b) of Article 6 (1) GDPR and the processing is carried out + by automated means. In exercising your right to data portability pursuant to paragraph 1, you have the right + to have the personal data transmitted directly from one controller to another, to the extent that this is + technically feasible. The exercise of the right to data portability does not affect your right to erasure + (“right to be forgotten”). That right shall not apply to processing necessary for the performance of a task + carried out in the public interest or in the exercise of official authority vested in the controller. + + + + 7.8 Right to object + + + + You have the right to object, on grounds relating to your particular situation, at any time to processing of + personal data which concerns you which is based on point (e) or (f) of Article 6 (1) GDPR, including + profiling based on those provisions. If objection is made, the controller will no longer process the + personal data unless the controller demonstrates compelling legitimate grounds for the processing which + override the interests, rights and freedoms of the data subject or for the establishment, exercise or + defense of legal claims. In the event that personal data is processed for direct marketing purposes, you + have the right to object at any time to processing of personal data concerning you for such marketing. This + also applies to profiling to the extent that it is related to such direct marketing. If you object to + processing for direct marketing purposes, your personal data shall no longer be processed for such purposes. + Regarding the use of information society services, and notwithstanding Directive 2002/58/EC, you can + exercise your right to object by automated means using technical specifications. Where personal data are + processed for scientific or historical research purposes or statistical purposes pursuant to Article 89 (1), + you, on grounds relating to your particular situation, have the right to object to processing of personal + data concerning you, unless the processing is necessary for the performance of a task carried out for + reasons of public interest. The right of objection can be exercised at any time by contacting the respective + controller. + + + + 7.9 Automated individual decision-making, including profiling + + + + You have the right not to be subject to a decision based solely on automated processing, including + profiling, which produces legal effects for you or similarly significantly affects you. This does not apply + if the decision: is necessary for entering into, or performance of, a contract between the data subject and + a data controller, is authorized by Union or Member State law to which the controller is subject and which + also lays down suitable measures to safeguard the data subject’s rights and freedoms and legitimate + interests, or is based on the data subject’s explicit consent. The controller shall implement suitable + measures to safeguard the data subject’s rights and freedoms and legitimate interests, at least the right to + obtain human intervention on the part of the controller, to express his or her point of view and to contest + the decision. This right can be exercised by the data subject at any time by contacting the respective + controller. + + + + 7.10 Right to lodge a complaint with a supervisory authority + + + + You also have the right, without prejudice to any other administrative or judicial remedy, to lodge a + complaint with a supervisory authority, in particular in the Member State of your habitual residence, place + of work or place of the alleged infringement if you as data subject consider that the processing of personal + data relating to you infringes this Regulation. + + + + 7.11 Right to effective judicial remedy + + + + Without prejudice to any other available administrative or judicial remedy, including the right to lodge a + complaint with a supervisory authority pursuant to Article 77 GDPR, you have the right to an effective + judicial remedy if you consider that your rights under this Regulation have been infringed as a result of + the processing of your personal data in breach of this Regulation. + + + Submitting requests + + Email privacy@open-assistant.io + +
+
+ + ); +}; + +PrivacyPolicy.getLayout = (page) => ( +
+
+ {page} +
+
+); + +export default PrivacyPolicy; diff --git a/website/src/pages/terms-of-service.tsx b/website/src/pages/terms-of-service.tsx new file mode 100644 index 00000000..b2d668a5 --- /dev/null +++ b/website/src/pages/terms-of-service.tsx @@ -0,0 +1,159 @@ +import { Container, Heading } from "@chakra-ui/react"; +import Head from "next/head"; +import { Footer } from "src/components/Footer"; +import { Header } from "src/components/Header"; + +const TermsOfService = () => { + return ( + <> + + Open Assistant Terms of Service + + +
+ + + Terms Of Service + + 1. Scope of application, amendments + + 1.1. LAION (association in formation), Marie-Henning-Weg 143, 21035 Hamburg (hereinafter referred to as: + "LAION") operates an online portal for the producing a machine learning model called Open + Assistant using crowdsourced data. + + + 1.2. The present terms of use regulate the user relationship between the users of the portal and LAION. + + + 1.3. LAION reserves the right to amend these Terms of Use at any time, also with regard to persons already + registered, if this becomes necessary due to changes in the law, changes in jurisdiction, changes in + economic circumstances or gaps in these Terms of Use that subsequently become apparent. The user will be + informed of such changes in good time by e-mail The user has the opportunity to object to the changes within + 14 days of receipt of this e-mail. If the user does not object to the changes and continues to use the + portal after expiry of the objection period, the changes shall be deemed to have been agreed effectively + from the expiry of the period. If the user objects to the changes within the two-week period, LAION shall be + entitled to exclude the user from using the portal. The user shall be informed of these effects once again + in the e-mail. + + 2. Subject of use, availability of the service + + 2.1. The portal serves as a platform for creating data to train an interactive agent for scientific + purposes. All text and prompt generated through the service are used for scientific purposes, in particular + for the optimization of the AI. + + + 2.2. The input of texts on the portal and the subsequent generation of text by the artificial intelligence + provided by the portal do not give rise to any works protected by copyright. The user who has entered the + text for the generation of the text shall have neither the exclusive rights of use nor any rights of an + author to the generated text. + + + 2.3. LAION shall endeavour to ensure that the portal can be used as uninterruptedly as possible. However, + there shall be no legal claim to the use of the portal. LAION reserves the right, at its own discretion, to + change the portal at any time and without notice, to discontinue its operation or to exclude individual + users from using it. Furthermore, it cannot be ruled out that temporary restrictions or interruptions may + occur due to technical faults (such as interruption of the power supply, hardware and software errors, + technical problems in the data lines). + + 3. User obligations + + 3.1. The user may only use the portal for the intended purposes. In particular, he/she may not misuse the + portal. The user undertakes to refrain from generating text that violate criminal law, youth protection + regulations or the applicable laws of the following countries: Federal Republic of Germany, United States of + America (USA), Great Britain, user's place of residence. In particular it is prohibited to enter texts + that lead to the creation of pornographic, violence-glorifying or paedosexual content and/or content that + violates the personal rights of third parties. LAION reserves the right to file a criminal complaint with + the competent authorities in the event of violations. + + + 3.2. The user undertakes not to use any programs, algorithms or other software in connection with the use of + the portal which could interfere with the functioning of the portal. Furthermore, the user shall not take + any measures that may result in an unreasonable or excessive load on the infrastructure of the portal or may + interfere with it in a disruptive manner. + + + 3.3. If a user notices obvious errors in the portal which could lead to misuse of the portal or the contents + contained therein, the user shall be obliged to report the error to LAION without delay. + + + 3.4. The use, distribution, storage, forwarding, editing and/or other use of images that violate these terms + of use is prohibited. + + 4. Liability + + 4.1. LAION accepts no liability for the accuracy, completeness, reliability, up-to-dateness and usability of + the content. + + + 4.2. LAION shall be liable without limitation for intent and gross negligence. In the case of simple + negligence, LAION shall only be liable for damage resulting from injury to life, limb or health or an + essential contractual obligation (obligation the fulfillment of which makes the proper performance of the + contract possible in the first place and on the observance of which the contractual partner regularly trusts + and may trust). + + + 4.3. In the event of a breach of material contractual obligations due to simple negligence, the liability of + LAION shall be limited to the amount of the foreseeable, typically occurring damage. In all other respects + liability shall be excluded. + + + 4.4. The above limitations of liability shall also apply in favour of the legal representatives and + vicarious agents of LAION. + + + 4.5. LAION shall not be liable for the loss of data of the user. The user shall be solely responsible for + the secure storage of his/her data. + + + 4.6 LAION shall not be liable for any damages incurred by the user as a result of the violation of these + terms of use. + + + 4.7 LAION shall not be liable for the use of content generated on the portal by text input outside the + portal. In particular, LAION shall not be liable for any damages incurred by the user due to the assumption + of copyrights or exclusive rights of use. + + 5. Data protection + + 5.1. LAION processes the personal data of users in accordance with the provisions of data protection law. + Detailed information can be found in the privacy policy, available at: /privacy-policy. + + + 5.2 The user expressly agrees that communication within the scope of and for the purpose of the user + relationship between him/her and LAION may also take place via unencrypted e-mails. The user is aware that + unencrypted e-mails only offer limited security and confidentiality. + + 6. Final provisions + + 6.1 The contractual relationship shall be governed exclusively by the law of the Federal Republic of Germany + to the exclusion of the UN Convention on Contracts for the International Sale of Goods. + + + 6.2 Should individual provisions of these GTC including this provision be or become invalid in whole or in + part, the validity of the remaining provisions shall remain unaffected. The invalid or missing provisions + shall be replaced by the respective statutory provisions. + + + 6.3 If the customer is a merchant, a legal entity under public law or a special fund under public law, the + place of jurisdiction for all disputes arising from and in connection with contracts concluded under these + terms of use shall be the registered office of LAION. + + Status: 1st January 2023 + +
+ + ); +}; + +TermsOfService.getLayout = (page) => ( +
+
+ {page} +
+
+); + +export default TermsOfService; From 19502cf8bf005792d2b92883a5e7ff105efdc478 Mon Sep 17 00:00:00 2001 From: chandru Date: Mon, 2 Jan 2023 19:34:37 +0530 Subject: [PATCH 03/34] added clear list for more understanding of result --- notebooks/data-argumentation/EssayRevision.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/data-argumentation/EssayRevision.ipynb b/notebooks/data-argumentation/EssayRevision.ipynb index 10d170ae..bcd13d45 100644 --- a/notebooks/data-argumentation/EssayRevision.ipynb +++ b/notebooks/data-argumentation/EssayRevision.ipynb @@ -1 +1 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyO8HHo9/NuZY8QnCvjrXaYb"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["#Essay Revision\n","The goal of this notebook is to use data argumentation to have data on improving essays. The way this is done is by taking a template \"good\" essay and making step by step changes that make it worse and add intructions on how to fix it."],"metadata":{"id":"o0lAqmWhsiUe"}},{"cell_type":"code","source":["import nltk\n","nltk.download('wordnet')\n","nltk.download('omw-1.4')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"AFUIjc7xw25A","executionInfo":{"status":"ok","timestamp":1672489678465,"user_tz":-60,"elapsed":240,"user":{"displayName":"Graverman","userId":"06659155231973912985"}},"outputId":"01c13cd7-7252-4948-fd9a-f36919f2214b"},"execution_count":35,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data] Package wordnet is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":35}]},{"cell_type":"markdown","source":["Put your essay here, [source of the essay used ](https://www.thewisdompost.com/essay/technology-essay/3387#essay-on-technology-for-college-and-university-students-essay-2-750-words)"],"metadata":{"id":"EcDYv9cnv18v"}},{"cell_type":"code","source":["essay = \"\"\"\n","We live in a world driven by technology — hardly anyone would argue with you if you said this. Technology, literally meaning the “science of craft”, refers to the collection of techniques, skills, methods, and processes used to produce goods or services or for accomplishing objectives such as scientific investigation. Technology can be embedded in machines enabling them to be used by people even without a detailed knowledge of their inner workings.\n","Technological growth is closely linked to the expansion of scientific research and knowledge. In the last 50 years, thanks to the exponential increases in computing power and microchip design and manufacture, there has been unprecedented innovation and technological growth in nearly every field of human endeavour from health and transport to industrial production and education.\n","\n","It is automotive technology that drives today’s electric and hybrid cars, and which will drive tomorrow’s driverless cars, hover-taxis and space cabs.\n","It is technology that drives the ubiquitous mobile phones that you will now find in the hands of even the poorest of the world’s poor. It is technology that creates hybrid seeds that resist inhospitable climatic conditions and difficult terrain, giving high yields in shorter times.\n","It is advancing medical technology that makes remote surgery, minimally invasive surgery and life-saving cures using stem cell transplants. Technology puts spacecrafts on asteroids and distant planets and lets us see new worlds. Technology splits atoms, revealing their secrets, and gives us ways to exploit them to create energy, quantum storage for data, and virtual reality games.\n","\n","There are people who strongly oppose technology and claim that it spells the death of ‘humanity’, and that we are approaching the day when machines will rule everything. They refer to fans of technology as ‘techies’ or sometimes ‘geeks’. On the other hand, proponents of technology call these people Luddites, a derogatory name for someone who is opposed to industrialisation, automation, computerisation and new technologies in general.\n","Is this true? Is technology really a curse disguised as a blessing? Many believe that the convergence of biotechnology and AI might be the most consequential development of all.\n","\n","In the last five decades, two areas in particular have grown faster than the rest, powered by research and advances in computing power. One is artificial intelligence, or AI; the other is biotechnology. Huge benefits have emerged from each of them for human beings in general, such as self-driving cars — which will dramatically reduce the death rate from road accidents — and robotic surgery, which enables precise, highly efficient and targeted surgical interventions.\n","Yet, visionaries like Yuval Noah Harari, author of the best-selling Homo sapiens and Deus, are now warning that the convergence of biotechnology and AI will irreversibly and unpredictably change both the quality of human life and its challenges in the next few decades. A good example of this is the facial recognition technology that is now present in all photo management programs. The AI in the software is capable of not only spotting the faces in every photograph but also recognising the person by name.\n","This technology has now expanded so that photo apps can recognise cats, dogs, beaches, mountains and cars too. Computers with AI are already correctly identifying human emotions through observing facial expressions and body movements. Some robots are able to mimic human emotions. This is called affective computing, sometimes called artificial emotional intelligence, and refers to the study and development of systems and devices that can recognize, interpret, process, and simulate human affects.\n","\n","The ability to read human emotions is just a step away from predicting human emotions. For example, if a computer attached to a video camera could identify which products a consumer is showing greater interest in or which ones he is really keen to buy, various tactics could be used to influence her to buy it.\n","Activists worry that computers that can understand and anticipate human wishes and desires by scanning their irises and analysing their micro-expressions could also be programmed to exploit and manipulate them.\n","Another very real fear is that humanoid computers with human-like skin, speech, and expressions could jeopardise and dehumanise relationship and create emotional vacuums.\n","\n","An enduring fear of Luddites has always been that computers will rob humans of their livelihood by taking their jobs and doing them more efficiently at lower cost. However, in reality the exact opposite has happened. As computerised machines began taking over mechanical and repetitive human activities, new jobs for people opened up that needs thinking and analytical skills and judgement, or human interpersonal skills. A good example is the worldwide proliferation of call centres.\n","When drones were invented many feared that pilots would soon be redundant. However, few people know that it takes almost 30 people to fly one military drone, and an additional 50 people to analyze and make sense of the data being streamed back by the drone.\n","The US army suffers from a serious shortage of trained, high quality drone pilots; anyone who masters this skill will have a job. But a social scientist warns that in 10 years, it is certain that computers will be flying that drone and humans will be redundant. Equally sure is that some brand new skill requirement will have opened up with advancing technology, calling for new talents.\n","\n","In the 20th century, a young man was supposed to choose a skill, vocation or profession, master it through education and practice, and then earn a living from it till he or she retired. However, the fast-changing nature of technology is making skills obsolete at a higher rate than ever before. To survive, tomorrow young man must keep re-inventing himself and updating his skills continuously. Life could be difficult if every new skill has a shelf life of only a decade or so.\n","Or perhaps one could look at it the other way — and say that changing technology will keep human beings on their toes throughout their life.\n","\n","Technology is the result of human inventiveness. It reflects our evolutionary heritage. We are neither strong like gorillas or tigers, nor fast like cheetahs and hawks, but our brains and thinking powers have given us the greatest edge of any species on the planet. Technology is a result.\n","Technology is either inherently good or bad; it is how we use it that makes it so. The splitting of a hydrogen atom is technology at work. As history has shown us, technology can equally be used to make a nuclear bomb that kills millions — or generate electricity that lights up a million homes.\n","\"\"\""],"metadata":{"id":"wvJHUeTJsiC7","executionInfo":{"status":"ok","timestamp":1672490871113,"user_tz":-60,"elapsed":250,"user":{"displayName":"Graverman","userId":"06659155231973912985"}}},"execution_count":58,"outputs":[]},{"cell_type":"code","execution_count":9,"metadata":{"id":"_ttU0Ma8p1_U","executionInfo":{"status":"ok","timestamp":1672487908938,"user_tz":-60,"elapsed":5,"user":{"displayName":"Graverman","userId":"06659155231973912985"}}},"outputs":[],"source":["instructions = []"]},{"cell_type":"code","source":["# Make stucture error (shuffle one paragraph with another)\n","essay_paragraphs = essay.split('\\n\\n')\n","\n","rand1 = random.randint(0, len(essay_paragraphs) - 1)\n","rand2 = random.randint(0, len(essay_paragraphs) - 1)\n","\n","temp = essay_paragraphs[rand1]\n","essay_paragraphs[rand1] = essay_paragraphs[rand2]\n","essay_paragraphs[rand2] = temp\n","\n","essay = \"\"\n","for i in essay_paragraphs:\n"," essay += i\n"," essay += \"\\n\\n\"\n","\n","instructions.append(\"Fix structure errors in this essay\")"],"metadata":{"id":"Evaej8oH8VLH","executionInfo":{"status":"ok","timestamp":1672490937384,"user_tz":-60,"elapsed":232,"user":{"displayName":"Graverman","userId":"06659155231973912985"}}},"execution_count":64,"outputs":[]},{"cell_type":"code","source":["# Make grammar erros (more like: change random words into words of similar meaning)\n","import nltk\n","from nltk.corpus import wordnet\n","import random\n","\n","essay_words = essay.split()\n","\n","for i in range(len(essay_words)):\n"," if random.randint(0, 100) < 30:\n"," suggestion = []\n"," for syn in wordnet.synsets(essay_words[i]):\n"," for l in syn.lemmas():\n"," suggestion.append(l.name())\n"," if suggestion != []:\n"," essay_words[i] = suggestion[random.randint(0, len(suggestion) - 1)]\n","\n","essay = \"\"\n","for i in essay_words:\n"," essay += i\n"," essay += \" \"\n","\n","\n","instructions.append(\"Fix grammar errors in this essay\")"],"metadata":{"id":"HhJXyfy-2OmT","executionInfo":{"status":"ok","timestamp":1672490091374,"user_tz":-60,"elapsed":257,"user":{"displayName":"Graverman","userId":"06659155231973912985"}}},"execution_count":43,"outputs":[]},{"cell_type":"code","source":["# Make typos\n","import string\n","import random\n","\n","# you can change the number 60 to change how much corrupted this essay will be\n","for i in range(len(essay) // 60):\n"," rand = random.randint(0, len(essay))\n"," essay = essay[:rand] + random.choice(string.ascii_letters) + essay[rand+1:]\n","\n","instructions.append(\"Fix typing errors in this essay\")"],"metadata":{"id":"delvA6xEzNwV","executionInfo":{"status":"ok","timestamp":1672490096010,"user_tz":-60,"elapsed":231,"user":{"displayName":"Graverman","userId":"06659155231973912985"}}},"execution_count":44,"outputs":[]},{"cell_type":"code","source":["# Prints intrcutions (final step)\n","for i in instructions:\n"," print(i)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4XLAXom_zGsR","executionInfo":{"status":"ok","timestamp":1672484222869,"user_tz":-60,"elapsed":364,"user":{"displayName":"Graverman","userId":"06659155231973912985"}},"outputId":"b741c776-41af-4ad5-8ab7-1825b19018ab"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":["Fix typing errors in this essay\n"]}]}]} \ No newline at end of file +{"cells":[{"cell_type":"markdown","metadata":{"id":"o0lAqmWhsiUe"},"source":["#Essay Revision\n","The goal of this notebook is to use data argumentation to have data on improving essays. The way this is done is by taking a template \"good\" essay and making step by step changes that make it worse and add intructions on how to fix it."]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":240,"status":"ok","timestamp":1672489678465,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"AFUIjc7xw25A","outputId":"01c13cd7-7252-4948-fd9a-f36919f2214b"},"outputs":[{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package wordnet to\n","[nltk_data] C:\\Users\\Chandru\\AppData\\Roaming\\nltk_data...\n","[nltk_data] Package wordnet is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to\n","[nltk_data] C:\\Users\\Chandru\\AppData\\Roaming\\nltk_data...\n","[nltk_data] Package omw-1.4 is already up-to-date!\n"]}],"source":["import nltk\n","nltk.download('wordnet')\n","nltk.download('omw-1.4')\n","import random"]},{"cell_type":"markdown","metadata":{"id":"EcDYv9cnv18v"},"source":["Put your essay here, [source of the essay used ](https://www.thewisdompost.com/essay/technology-essay/3387#essay-on-technology-for-college-and-university-students-essay-2-750-words)"]},{"cell_type":"code","execution_count":6,"metadata":{"executionInfo":{"elapsed":250,"status":"ok","timestamp":1672490871113,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"wvJHUeTJsiC7"},"outputs":[],"source":["essay = \"\"\"\n","We live in a world driven by technology — hardly anyone would argue with you if you said this. Technology, literally meaning the “science of craft”, refers to the collection of techniques, skills, methods, and processes used to produce goods or services or for accomplishing objectives such as scientific investigation. Technology can be embedded in machines enabling them to be used by people even without a detailed knowledge of their inner workings.\n","Technological growth is closely linked to the expansion of scientific research and knowledge. In the last 50 years, thanks to the exponential increases in computing power and microchip design and manufacture, there has been unprecedented innovation and technological growth in nearly every field of human endeavour from health and transport to industrial production and education.\n","\n","It is automotive technology that drives today’s electric and hybrid cars, and which will drive tomorrow’s driverless cars, hover-taxis and space cabs.\n","It is technology that drives the ubiquitous mobile phones that you will now find in the hands of even the poorest of the world’s poor. It is technology that creates hybrid seeds that resist inhospitable climatic conditions and difficult terrain, giving high yields in shorter times.\n","It is advancing medical technology that makes remote surgery, minimally invasive surgery and life-saving cures using stem cell transplants. Technology puts spacecrafts on asteroids and distant planets and lets us see new worlds. Technology splits atoms, revealing their secrets, and gives us ways to exploit them to create energy, quantum storage for data, and virtual reality games.\n","\n","There are people who strongly oppose technology and claim that it spells the death of ‘humanity’, and that we are approaching the day when machines will rule everything. They refer to fans of technology as ‘techies’ or sometimes ‘geeks’. On the other hand, proponents of technology call these people Luddites, a derogatory name for someone who is opposed to industrialisation, automation, computerisation and new technologies in general.\n","Is this true? Is technology really a curse disguised as a blessing? Many believe that the convergence of biotechnology and AI might be the most consequential development of all.\n","\n","In the last five decades, two areas in particular have grown faster than the rest, powered by research and advances in computing power. One is artificial intelligence, or AI; the other is biotechnology. Huge benefits have emerged from each of them for human beings in general, such as self-driving cars — which will dramatically reduce the death rate from road accidents — and robotic surgery, which enables precise, highly efficient and targeted surgical interventions.\n","Yet, visionaries like Yuval Noah Harari, author of the best-selling Homo sapiens and Deus, are now warning that the convergence of biotechnology and AI will irreversibly and unpredictably change both the quality of human life and its challenges in the next few decades. A good example of this is the facial recognition technology that is now present in all photo management programs. The AI in the software is capable of not only spotting the faces in every photograph but also recognising the person by name.\n","This technology has now expanded so that photo apps can recognise cats, dogs, beaches, mountains and cars too. Computers with AI are already correctly identifying human emotions through observing facial expressions and body movements. Some robots are able to mimic human emotions. This is called affective computing, sometimes called artificial emotional intelligence, and refers to the study and development of systems and devices that can recognize, interpret, process, and simulate human affects.\n","\n","The ability to read human emotions is just a step away from predicting human emotions. For example, if a computer attached to a video camera could identify which products a consumer is showing greater interest in or which ones he is really keen to buy, various tactics could be used to influence her to buy it.\n","Activists worry that computers that can understand and anticipate human wishes and desires by scanning their irises and analysing their micro-expressions could also be programmed to exploit and manipulate them.\n","Another very real fear is that humanoid computers with human-like skin, speech, and expressions could jeopardise and dehumanise relationship and create emotional vacuums.\n","\n","An enduring fear of Luddites has always been that computers will rob humans of their livelihood by taking their jobs and doing them more efficiently at lower cost. However, in reality the exact opposite has happened. As computerised machines began taking over mechanical and repetitive human activities, new jobs for people opened up that needs thinking and analytical skills and judgement, or human interpersonal skills. A good example is the worldwide proliferation of call centres.\n","When drones were invented many feared that pilots would soon be redundant. However, few people know that it takes almost 30 people to fly one military drone, and an additional 50 people to analyze and make sense of the data being streamed back by the drone.\n","The US army suffers from a serious shortage of trained, high quality drone pilots; anyone who masters this skill will have a job. But a social scientist warns that in 10 years, it is certain that computers will be flying that drone and humans will be redundant. Equally sure is that some brand new skill requirement will have opened up with advancing technology, calling for new talents.\n","\n","In the 20th century, a young man was supposed to choose a skill, vocation or profession, master it through education and practice, and then earn a living from it till he or she retired. However, the fast-changing nature of technology is making skills obsolete at a higher rate than ever before. To survive, tomorrow young man must keep re-inventing himself and updating his skills continuously. Life could be difficult if every new skill has a shelf life of only a decade or so.\n","Or perhaps one could look at it the other way — and say that changing technology will keep human beings on their toes throughout their life.\n","\n","Technology is the result of human inventiveness. It reflects our evolutionary heritage. We are neither strong like gorillas or tigers, nor fast like cheetahs and hawks, but our brains and thinking powers have given us the greatest edge of any species on the planet. Technology is a result.\n","Technology is either inherently good or bad; it is how we use it that makes it so. The splitting of a hydrogen atom is technology at work. As history has shown us, technology can equally be used to make a nuclear bomb that kills millions — or generate electricity that lights up a million homes.\n","\"\"\""]},{"cell_type":"code","execution_count":7,"metadata":{"executionInfo":{"elapsed":5,"status":"ok","timestamp":1672487908938,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"_ttU0Ma8p1_U"},"outputs":[],"source":["instructions = []"]},{"cell_type":"code","execution_count":8,"metadata":{"executionInfo":{"elapsed":232,"status":"ok","timestamp":1672490937384,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"Evaej8oH8VLH"},"outputs":[],"source":["# Make stucture error (shuffle one paragraph with another)\n","essay_paragraphs = essay.split('\\n\\n') #Splitting a String by newline character (\\n)\n","\n","rand1 = random.randint(0, len(essay_paragraphs) - 1)\n","rand2 = random.randint(0, len(essay_paragraphs) - 1)\n","\n","temp = essay_paragraphs[rand1]\n","essay_paragraphs[rand1] = essay_paragraphs[rand2]\n","essay_paragraphs[rand2] = temp\n","\n","essay = \"\"\n","for i in essay_paragraphs:\n"," essay += i\n"," essay += \"\\n\\n\"\n","\n","instructions.append(\"Fix structure errors in this essay\")"]},{"cell_type":"code","execution_count":9,"metadata":{"executionInfo":{"elapsed":257,"status":"ok","timestamp":1672490091374,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"HhJXyfy-2OmT"},"outputs":[],"source":["# Make grammar erros (more like: change random words into words of similar meaning)\n","import nltk\n","from nltk.corpus import wordnet\n","import random\n","\n","essay_words = essay.split()\n","\n","for i in range(len(essay_words)):\n"," if random.randint(0, 100) < 30:\n"," suggestion = []\n"," for syn in wordnet.synsets(essay_words[i]):\n"," for l in syn.lemmas():\n"," suggestion.append(l.name())\n"," if suggestion != []:\n"," essay_words[i] = suggestion[random.randint(0, len(suggestion) - 1)]\n","\n","essay = \"\"\n","for i in essay_words:\n"," essay += i\n"," essay += \" \"\n","\n","\n","instructions.append(\"Fix grammar errors in this essay\")"]},{"cell_type":"code","execution_count":14,"metadata":{"executionInfo":{"elapsed":231,"status":"ok","timestamp":1672490096010,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"delvA6xEzNwV"},"outputs":[],"source":["# Make typos\n","import string\n","import random\n","\n","# you can change the number 60 to change how much corrupted this essay will be\n","for i in range(len(essay) // 60):\n"," rand = random.randint(0, len(essay))\n"," essay = essay[:rand] + random.choice(string.ascii_letters) + essay[rand+1:]\n","\n","instructions.append(\"Fix typing errors in this essay\")"]},{"cell_type":"code","execution_count":15,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":364,"status":"ok","timestamp":1672484222869,"user":{"displayName":"Graverman","userId":"06659155231973912985"},"user_tz":-60},"id":"4XLAXom_zGsR","outputId":"b741c776-41af-4ad5-8ab7-1825b19018ab"},"outputs":[{"name":"stdout","output_type":"stream","text":["Fix typing errors in this essay\n"]}],"source":["# Prints intrcutions (final step)\n","for i in instructions:\n"," print(i)\n","instructions.clear()"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"colab":{"authorship_tag":"ABX9TyO8HHo9/NuZY8QnCvjrXaYb","provenance":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.4"},"vscode":{"interpreter":{"hash":"492d89208e1af30f4727fd53e254ea56e6b1a843b376782bfa5f6ce13d676265"}}},"nbformat":4,"nbformat_minor":0} From 527aca676f74441c96f302b9725b4f56698278a4 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Tue, 3 Jan 2023 08:36:27 +0900 Subject: [PATCH 04/34] linting the copilot configs --- .pre-commit-config.yaml | 2 +- backend/oasst_backend/config.py | 1 + copilot/api/addons/api-cluster.yml | 161 +++++++++++++++++++++++++++++ copilot/api/manifest.yml | 38 +++++++ copilot/web/manifest.yml | 2 +- docker-compose.yaml | 2 +- docker/Dockerfile.backend | 1 + 7 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 copilot/api/addons/api-cluster.yml create mode 100644 copilot/api/manifest.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c32ca7c8..756f58f8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: # and which break the standard YAML check. The alternative would be to # skip any unsafe errors (and thus break YAML compatibility) or use # some other checker that may not work in general. - exclude: "^copilot/web/addons/.*$" + exclude: "^copilot/.*/addons/.*$" - id: check-json - id: check-case-conflict - id: detect-private-key diff --git a/backend/oasst_backend/config.py b/backend/oasst_backend/config.py index 602780be..dc602c8b 100644 --- a/backend/oasst_backend/config.py +++ b/backend/oasst_backend/config.py @@ -44,4 +44,5 @@ class Settings(BaseSettings): raise ValueError(v) +print("cats") settings = Settings(_env_file=".env") diff --git a/copilot/api/addons/api-cluster.yml b/copilot/api/addons/api-cluster.yml new file mode 100644 index 00000000..c4a17848 --- /dev/null +++ b/copilot/api/addons/api-cluster.yml @@ -0,0 +1,161 @@ +Parameters: + App: + Type: String + Description: Your application's name. + Env: + Type: String + Description: + The environment name your service, job, or workflow is being deployed to. + Name: + Type: String + Description: The name of the service, job, or workflow being deployed. + # Customize your Aurora Serverless cluster by setting the default value of the following parameters. + apiclusterDBName: + Type: String + Description: + The name of the initial database to be created in the Aurora Serverless v2 + cluster. + Default: oassist_api + # Cannot have special characters + # Naming constraints: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_Limits.html#RDS_Limits.Constraints +Mappings: + apiclusterEnvScalingConfigurationMap: + staging: + "DBMinCapacity": 0.5 # AllowedValues: from 0.5 through 128 + "DBMaxCapacity": 8 # AllowedValues: from 0.5 through 128 + + All: + "DBMinCapacity": 0.5 # AllowedValues: from 0.5 through 128 + "DBMaxCapacity": 8 # AllowedValues: from 0.5 through 128 + +Resources: + apiclusterDBSubnetGroup: + Type: "AWS::RDS::DBSubnetGroup" + Properties: + DBSubnetGroupDescription: + Group of Copilot private subnets for Aurora Serverless v2 cluster. + SubnetIds: + !Split [",", { "Fn::ImportValue": !Sub "${App}-${Env}-PrivateSubnets" }] + apiclusterSecurityGroup: + Metadata: + "aws:copilot:description": + "A security group for your workload to access the Aurora Serverless v2 + cluster apicluster" + Type: "AWS::EC2::SecurityGroup" + Properties: + GroupDescription: + !Sub "The Security Group for ${Name} to access Aurora Serverless v2 + cluster apicluster." + VpcId: + Fn::ImportValue: !Sub "${App}-${Env}-VpcId" + Tags: + - Key: Name + Value: !Sub "copilot-${App}-${Env}-${Name}-Aurora" + apiclusterDBClusterSecurityGroup: + Metadata: + "aws:copilot:description": + "A security group for your Aurora Serverless v2 cluster apicluster" + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: The Security Group for the Aurora Serverless v2 cluster. + SecurityGroupIngress: + - ToPort: 5432 + FromPort: 5432 + IpProtocol: tcp + Description: + !Sub "From the Aurora Security Group of the workload ${Name}." + SourceSecurityGroupId: !Ref apiclusterSecurityGroup + VpcId: + Fn::ImportValue: !Sub "${App}-${Env}-VpcId" + apiclusterAuroraSecret: + Metadata: + "aws:copilot:description": + "A Secrets Manager secret to store your DB credentials" + Type: AWS::SecretsManager::Secret + Properties: + Description: !Sub Aurora main user secret for ${AWS::StackName} + GenerateSecretString: + SecretStringTemplate: '{"username": "postgres"}' + GenerateStringKey: "password" + ExcludePunctuation: true + IncludeSpace: false + PasswordLength: 16 + apiclusterDBClusterParameterGroup: + Metadata: + "aws:copilot:description": + "A DB parameter group for engine configuration values" + Type: "AWS::RDS::DBClusterParameterGroup" + Properties: + Description: !Ref "AWS::StackName" + Family: "aurora-postgresql14" + Parameters: + client_encoding: "UTF8" + apiclusterDBCluster: + Metadata: + "aws:copilot:description": + "The apicluster Aurora Serverless v2 database cluster" + Type: "AWS::RDS::DBCluster" + Properties: + MasterUsername: + !Join [ + "", + [ + "{{resolve:secretsmanager:", + !Ref apiclusterAuroraSecret, + ":SecretString:username}}", + ], + ] + MasterUserPassword: + !Join [ + "", + [ + "{{resolve:secretsmanager:", + !Ref apiclusterAuroraSecret, + ":SecretString:password}}", + ], + ] + DatabaseName: !Ref apiclusterDBName + Engine: "aurora-postgresql" + EngineVersion: "14.4" + DBClusterParameterGroupName: !Ref apiclusterDBClusterParameterGroup + DBSubnetGroupName: !Ref apiclusterDBSubnetGroup + Port: 5432 + VpcSecurityGroupIds: + - !Ref apiclusterDBClusterSecurityGroup + ServerlessV2ScalingConfiguration: + # Replace "All" below with "!Ref Env" to set different autoscaling limits per environment. + MinCapacity: + !FindInMap [apiclusterEnvScalingConfigurationMap, All, DBMinCapacity] + MaxCapacity: + !FindInMap [apiclusterEnvScalingConfigurationMap, All, DBMaxCapacity] + apiclusterDBWriterInstance: + Metadata: + "aws:copilot:description": + "The apicluster Aurora Serverless v2 writer instance" + Type: "AWS::RDS::DBInstance" + Properties: + DBClusterIdentifier: !Ref apiclusterDBCluster + DBInstanceClass: db.serverless + Engine: "aurora-postgresql" + PromotionTier: 1 + AvailabilityZone: !Select + - 0 + - !GetAZs + Ref: AWS::Region + + apiclusterSecretAuroraClusterAttachment: + Type: AWS::SecretsManager::SecretTargetAttachment + Properties: + SecretId: !Ref apiclusterAuroraSecret + TargetId: !Ref apiclusterDBCluster + TargetType: AWS::RDS::DBCluster +Outputs: + apiclusterSecret: # injected as APICLUSTER_SECRET environment variable by Copilot. + Description: + "The JSON secret that holds the database username and password. Fields are + 'host', 'port', 'dbname', 'username', 'password', 'dbClusterIdentifier' + and 'engine'" + Value: !Ref apiclusterAuroraSecret + apiclusterSecurityGroup: + Description: "The security group to attach to the workload." + Value: !Ref apiclusterSecurityGroup diff --git a/copilot/api/manifest.yml b/copilot/api/manifest.yml new file mode 100644 index 00000000..b9262b51 --- /dev/null +++ b/copilot/api/manifest.yml @@ -0,0 +1,38 @@ +# The manifest for the "api" service. +# Read the full specification for the "Load Balanced Web Service" type at: +# https://aws.github.io/copilot-cli/docs/manifest/lb-web-service/ + +name: api +type: Load Balanced Web Service + +http: + path: "/" + healthcheck: + path: "/docs" + +image: + build: + dockerfile: docker/Dockerfile.backend + context: ./ + port: 8080 + +cpu: 256 +memory: 512 +platform: linux/x86_64 +count: 1 +exec: true +network: + connect: true + +environments: + staging: + variables: + # Note: this has to be a valid JSON list for Pydantic to parse it. + BACKEND_CORS_ORIGINS: '["https://web.staging.open-assistant.surfacedata.org"]' + DEBUG_ALLOW_ANY_API_KEY: True + DEBUG_SKIP_API_KEY_CHECK: True + MAX_WORKERS: 1 + +secrets: + # Note: URI, not URL. + DATABASE_URI: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/API_DATABASE_URL diff --git a/copilot/web/manifest.yml b/copilot/web/manifest.yml index 18df80c1..aadc3297 100644 --- a/copilot/web/manifest.yml +++ b/copilot/web/manifest.yml @@ -26,6 +26,7 @@ environments: staging: variables: NEXTAUTH_URL: https://web.staging.open-assistant.surfacedata.org + FASTAPI_URL: https://api.staging.open-assistant.surfacedata.org secrets: DATABASE_URL: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/DATABASE_URL @@ -37,5 +38,4 @@ secrets: EMAIL_SERVER_USER: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/EMAIL_SERVER_USER EMAIL_FROM: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/EMAIL_FROM FASTAPI_KEY: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/FASTAPI_KEY - FASTAPI_URL: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/FASTAPI_URL NEXTAUTH_SECRET: /copilot/${COPILOT_APPLICATION_NAME}/${COPILOT_ENVIRONMENT_NAME}/secrets/NEXTAUTH_SECRET diff --git a/docker-compose.yaml b/docker-compose.yaml index ed72c820..4309db65 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -91,8 +91,8 @@ services: environment: - POSTGRES_HOST=db - DEBUG_SKIP_API_KEY_CHECK=True - - DEBUG_USE_SEED_DATA=True - MAX_WORKERS=1 + #- DEBUG_USE_SEED_DATA=True depends_on: db: condition: service_healthy diff --git a/docker/Dockerfile.backend b/docker/Dockerfile.backend index d9458ae0..1f3bdfcd 100644 --- a/docker/Dockerfile.backend +++ b/docker/Dockerfile.backend @@ -5,6 +5,7 @@ COPY ./backend/requirements.txt /app/requirements.txt RUN pip install --no-cache-dir --upgrade -r /app/requirements.txt ENV PORT 8080 +EXPOSE 8080 COPY ./oasst-shared /oasst-shared RUN pip install -e /oasst-shared From 7f1644e38d301ee2bea0178062e7f0f8177a911c Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Tue, 3 Jan 2023 08:40:59 +0900 Subject: [PATCH 05/34] Factoring out the re-used layout --- website/src/components/Layout.tsx | 8 ++++++++ website/src/pages/index.tsx | 9 ++------- website/src/pages/privacy-policy.tsx | 9 ++------- website/src/pages/terms-of-service.tsx | 9 ++------- 4 files changed, 14 insertions(+), 21 deletions(-) diff --git a/website/src/components/Layout.tsx b/website/src/components/Layout.tsx index 5f6f66b4..3564d765 100644 --- a/website/src/components/Layout.tsx +++ b/website/src/components/Layout.tsx @@ -17,4 +17,12 @@ export const getDefaultLayout = (page: React.ReactElement) => ( ); +export const getTransparentHeaderLayout = (page: React.ReactElement) => ( +
+
+ {page} +
+
+); + export const noLayout = (page: React.ReactElement) => page; diff --git a/website/src/pages/index.tsx b/website/src/pages/index.tsx index 8c2c34b5..20a4068c 100644 --- a/website/src/pages/index.tsx +++ b/website/src/pages/index.tsx @@ -5,6 +5,7 @@ import { Faq } from "src/components/Faq"; import { Footer } from "src/components/Footer"; import { Header } from "src/components/Header"; import { Hero } from "src/components/Hero"; +import { getTransparentHeaderLayout } from "src/components/Layout"; import { TaskSelection } from "src/components/TaskSelection"; const Home = () => { @@ -34,12 +35,6 @@ const Home = () => { ); }; -Home.getLayout = (page) => ( -
-
- {page} -
-
-); +Home.getLayout = getTransparentHeaderLayout; export default Home; diff --git a/website/src/pages/privacy-policy.tsx b/website/src/pages/privacy-policy.tsx index 42f439fc..dcb3bc19 100644 --- a/website/src/pages/privacy-policy.tsx +++ b/website/src/pages/privacy-policy.tsx @@ -2,6 +2,7 @@ import { Container, Heading } from "@chakra-ui/react"; import Head from "next/head"; import { Footer } from "src/components/Footer"; import { Header } from "src/components/Header"; +import { getTransparentHeaderLayout } from "src/components/Layout"; const PrivacyPolicy = () => { return ( @@ -403,12 +404,6 @@ const PrivacyPolicy = () => { ); }; -PrivacyPolicy.getLayout = (page) => ( -
-
- {page} -
-
-); +PrivacyPolicy.getLayout = getTransparentHeaderLayout; export default PrivacyPolicy; diff --git a/website/src/pages/terms-of-service.tsx b/website/src/pages/terms-of-service.tsx index b2d668a5..d97c8d34 100644 --- a/website/src/pages/terms-of-service.tsx +++ b/website/src/pages/terms-of-service.tsx @@ -2,6 +2,7 @@ import { Container, Heading } from "@chakra-ui/react"; import Head from "next/head"; import { Footer } from "src/components/Footer"; import { Header } from "src/components/Header"; +import { getTransparentHeaderLayout } from "src/components/Layout"; const TermsOfService = () => { return ( @@ -148,12 +149,6 @@ const TermsOfService = () => { ); }; -TermsOfService.getLayout = (page) => ( -
-
- {page} -
-
-); +TermsOfService.getLayout = getTransparentHeaderLayout; export default TermsOfService; From 57d3b92fa4fdd83d29ec194bd00a5ca3b3015a87 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Mon, 2 Jan 2023 17:31:36 -0800 Subject: [PATCH 06/34] pre-commit hook black->black-jupyter --- .pre-commit-config.yaml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6d885cdc..329918ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,10 +26,7 @@ # # /WARNING! -exclude: "build|stubs|^bot/templates/|^notebooks/.*\\.ipynb$" - -default_language_version: - python: python3 +exclude: build|stubs|^bot/templates/|^notebooks/.*\\.ipynb$ repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -42,12 +39,12 @@ repos: # and which break the standard YAML check. The alternative would be to # skip any unsafe errors (and thus break YAML compatibility) or use # some other checker that may not work in general. - exclude: "^copilot/web/addons/.*$" + exclude: ^copilot/web/addons/.*$ - id: check-json - id: check-case-conflict - id: detect-private-key - id: fix-encoding-pragma - args: ["--remove"] + args: [--remove] - id: forbid-submodules - id: mixed-line-ending - id: requirements-txt-fixer @@ -57,13 +54,13 @@ repos: - id: check-symlinks - id: check-merge-conflict - id: check-added-large-files - args: ["--maxkb=1024"] + args: [--maxkb=1024] - id: end-of-file-fixer - repo: https://github.com/psf/black rev: 22.12.0 hooks: - - id: black + - id: black-jupyter - repo: https://github.com/pycqa/flake8 rev: 6.0.0 @@ -79,7 +76,7 @@ repos: rev: v2.7.1 hooks: - id: prettier - args: ["--prose-wrap=always", "--write"] + args: [--prose-wrap=always, --write] - repo: local hooks: From dbfa77e8b794d4f151fd2e7c2fa6586934b7f638 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Tue, 3 Jan 2023 15:01:24 +0900 Subject: [PATCH 07/34] Fixing some small merge issues and ensure docker runs the backend properly --- docker-compose.yaml | 2 +- website/src/components/Footer.tsx | 11 ----------- website/src/components/Header/Header.tsx | 4 ---- website/src/pages/api/new_task/[task_type].ts | 4 +--- website/src/pages/api/update_task.ts | 6 +----- website/src/pages/auth/signin.tsx | 11 ++--------- 6 files changed, 5 insertions(+), 33 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index dc147c73..6bc42c51 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -9,7 +9,7 @@ services: # Use `docker compose up frontend-dev --build --attach-dependencies` to start all services needed to work on the frontend. frontend-dev: image: sverrirab/sleep - depends_on: [db, webdb, adminer, maildev, backend] + depends_on: [db, webdb, adminer, maildev, backend, redis] # This DB is for the FastAPI Backend. db: diff --git a/website/src/components/Footer.tsx b/website/src/components/Footer.tsx index cadae07e..fc88368e 100644 --- a/website/src/components/Footer.tsx +++ b/website/src/components/Footer.tsx @@ -22,17 +22,6 @@ export function Footer() {