diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index de06c7ba..ff34b62c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-exclude: "build|stubs|^bot/templates/"
+exclude: "build|stubs|^bot/templates/|openassistant/templates"
 
 default_language_version:
   python: python3
diff --git a/docs/datasets/README.md b/docs/datasets/README.md
index e7cb264e..08c5fd13 100644
--- a/docs/datasets/README.md
+++ b/docs/datasets/README.md
@@ -11,11 +11,13 @@ and then running:
 ```python
 from datasets import load_dataset
 
-dataset = load_dataset("OpenAssistant/<dataset-name>")
+dataset = load_dataset("OpenAssistant/{dataset-name}")
 ```
 
 See the instructions below if you'd like to contribute a new dataset to the project.
 
+## Adding a new dataset
+
 ## Uploading a dataset to the Hugging Face Hub
 
 Adding a new dataset for the OpenAssistant project typically involves the following steps:
@@ -29,8 +31,8 @@ Adding a new dataset for the OpenAssistant project typically involves the follow
 
 To upload a dataset to the OpenAssistant organization, you first need to:
 
-* Create a [Hugging Face account](https://huggingface.co/join) (it's free)
-* Join the [OpenAssistant organization](https://huggingface.co/OpenAssistant) by clicking on the _Request to join this org_ button on the top right-hand side
+- Create a [Hugging Face account](https://huggingface.co/join) (it's free)
+- Join the [OpenAssistant organization](https://huggingface.co/OpenAssistant) by clicking on the _Request to join this org_ button on the top right-hand side
 
 By default, your [role](https://huggingface.co/docs/hub/organizations-security#access-control-in-organizations) in the organization is `contributor`, which gives you write access to any datasets that you create (and only those). If you'd like to make changes to other datasets, [open a discussion or Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions).
 
@@ -76,8 +78,8 @@ python -m pip install datasets
 
 We've created a [Gradio application](https://huggingface.co/spaces/OpenAssistant/dataset-generator) on Hugging Face Spaces that will create a new dataset repository for you with the following template files:
 
-* A dataset loading script
-* A dataset card
+- A dataset loading script
+- A dataset card
 
 Simply provide the name of the new dataset and your access token from Step 1, and you're good to go!
 
@@ -97,5 +99,4 @@ from datasets import load_dataset
 load_dataset("OpenAssistant/my_dataset")
 ```
 
-Congratulations - you've now added a dataset to the Hub!
-
+Congratulations - you've now added a dataset to the OpenAssistant org!
diff --git a/openassistant/__init__.py b/openassistant/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/openassistant/templates/README.md b/openassistant/templates/README.md
new file mode 100644
index 00000000..b7899eb7
--- /dev/null
+++ b/openassistant/templates/README.md
@@ -0,0 +1,31 @@
+# Dataset instructions for {dataset_name}
+
+## Setup
+
+```bash
+python -m pip install -r requirements.txt
+```
+
+## Download
+
+```bash
+python download.py
+```
+
+## Preprocess
+
+```bash
+python preprocess.py
+```
+
+## Load
+
+```bash
+python load.py
+```
+
+## Upload
+
+```bash
+python upload.py
+```
diff --git a/openassistant/templates/requirements.txt b/openassistant/templates/requirements.txt
new file mode 100644
index 00000000..74fb5166
--- /dev/null
+++ b/openassistant/templates/requirements.txt
@@ -0,0 +1,2 @@
+datasets>=2.8
+typer
diff --git a/openassistant/templates/template.py b/openassistant/templates/template.py
new file mode 100644
index 00000000..b6c52871
--- /dev/null
+++ b/openassistant/templates/template.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenAssistant Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This template serves as a starting point for contributing a dataset to the OpenAssistant repo.
+
+When modifying it for your dataset, look for TODO items that offer specific instructions.
+
+To create a dataset loading script you will create a class and implement 3 methods:
+  * `_info`: Establishes the schema for the dataset, and returns a datasets.DatasetInfo object.
+  * `_split_generators`: Downloads and extracts data for each split (e.g. train/val/test) or associates local data with each split.
+  * `_generate_examples`: Creates examples from data on disk that conform to each schema defined in `_info`.
+
+Full documentation on writing dataset loading scripts can be found here:
+https://huggingface.co/docs/datasets/dataset_script
+
+This template is adapted from the one provided by BigScience's BigBIO library:
+https://github.com/bigscience-workshop/biomedical/blob/main/templates/template.py
+
+TODO: Before submitting your script, delete this docstring and replace it with a description of your dataset.
+"""
+
+import os
+from typing import Dict, List, Tuple
+
+import datasets
+
+from openassistant.utils.configs import OpenAssistantConfig
+
+# TODO: import the schema that fits your dataset:
+from openassistant.utils.schemas import
+
+# TODO: Add BibTeX citation where appropriate
+_CITATION = """\
+@article{,
+  author    = {},
+  title     = {},
+  journal   = {},
+  volume    = {},
+  year      = {},
+  url       = {},
+  doi       = {},
+  biburl    = {},
+  bibsource = {}
+}
+"""
+
+# TODO: create a module level variable with your dataset name (should match the script name)
+#  E.g. The Pile: [dataset_name] --> the_pile
+_DATASETNAME = "[dataset_name]"
+# TODO: create a pretty display name for your dataset
+_DISPLAYNAME = "Dataset Name"
+
+# TODO: Add a description of the dataset here
+# You can copy an official description
+_DESCRIPTION = """\
+This dataset is designed for XXX NLP task.
+"""
+
+# TODO: Add a link to an official homepage for the dataset here (if possible)
+_HOMEPAGE = ""
+
+# TODO: Add the licence for the dataset here (if possible)
+# Note that this doesn't have to be a common open source license.
+# Some datasets have custom licenses. In this case, simply put the full license terms
+# into `_LICENSE`
+_LICENSE = ""
+
+# TODO: Add links to the URLs needed to download your dataset files.
+# This variable can be a relative path for datasets whose files need to be
+# manually downloaded or preprocessed in advance.
+
+# For publicly available datasets you will most likely end up passing these URLs to dl_manager in _split_generators.
+# However, if you need to access different files for each config you can have multiple entries in this dict.
+# This can be an arbitrarily nested dict/list of URLs (see below in `_split_generators` method)
+_URLS = {
+    _DATASETNAME: "url or list of urls or relative path like ./data ",
+}
+
+# TODO: add supported task by dataset. One dataset may support multiple tasks
+_SUPPORTED_TASKS = []  # example: [Tasks.TRANSLATION, Tasks.NAMED_ENTITY_RECOGNITION, Tasks.RELATION_EXTRACTION]
+
+# TODO: set this to a version that is associated with the dataset. if none exists use "1.0.0"
+#  This version doesn't have to be consistent with semantic versioning. Anything that is
+#  provided by the original dataset as a version goes.
+_VERSION = ""
+
+
+# TODO: Name the dataset class to match the script name using CamelCase instead of snake_case
+#  Append "Dataset" to the class name: ThePile --> ThePileDataset
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    VERSION = datasets.Version(_VERSION)
+
+    # You will be able to load each dataset with
+    # dataset = datasets.load_dataset('my_dataset')
+
+    # TODO: For each dataset, implement a config for each subset;
+    #  If a dataset contains more than one subset, implement a config for EACH of them.
+    #  Each of them should contain:
+    #   - name: should be unique for each dataset config eg. the_pile_[schema_name]
+    #   - version: VERSION
+    #   - description: one line description for the dataset
+    #   - schema: open_assistant_[schema_name]
+    #   - subset_id: subset id is the canonical name for the dataset (eg. the_pile)
+    #  where [schema_name] = (language_modeling)
+
+    BUILDER_CONFIGS = [
+        OpenAssistantConfig(
+            name=f"{_DATASETNAME}_[schema_name]",
+            version=VERSION,
+            description=f"OpenAssistant dataset config for {_DATASETNAME}",
+            schema_name="[schema_name]",
+            subset_id=_DATASETNAME,
+        )
+    ]
+
+    DEFAULT_CONFIG_NAME = _DATASETNAME
+
+    def _info(self) -> datasets.DatasetInfo:
+        # TODO: Implement the schema for your dataset here.
+        raise NotImplementedError()
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager) -> List[datasets.SplitGenerator]:
+        """Returns SplitGenerators."""
+        # TODO: This method is tasked with downloading/extracting the data and defining the splits depending on the configuration
+
+        # If you need to access a config choice, that will be in self.config.name
+
+        # dl_manager is a datasets.download.DownloadManager that can be used to download and extract URLs; many examples use the download_and_extract method; see the DownloadManager docs here: https://huggingface.co/docs/datasets/package_reference/builder_classes.html#datasets.DownloadManager
+
+        # dl_manager can accept any type of nested list/dict and will give back the same structure with the url replaced with the path to local files.
+
+        urls = _URLS[_DATASETNAME]
+        data_dir = dl_manager.download_and_extract(urls)
+
+        # Not all datasets have predefined canonical train/val/test splits.
+        # If your dataset has no predefined splits, use datasets.Split.TRAIN for all of the data.
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                # Whatever you put in gen_kwargs will be passed to _generate_examples
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, "train.jsonl"),
+                    "split": "train",
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, "test.jsonl"),
+                    "split": "test",
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, "dev.jsonl"),
+                    "split": "dev",
+                },
+            ),
+        ]
+
+    # method parameters are unpacked from `gen_kwargs` as given in `_split_generators`
+
+    # TODO: change the args of this function to match the keys in `gen_kwargs`. You may add any necessary kwargs.
+
+    def _generate_examples(self, filepath, split: str) -> Tuple[int, Dict]:
+        """Yields examples as (key, example) tuples."""
+        # TODO: This method handles input defined in _split_generators to yield (key, example) tuples from the dataset.
+
+        # The `key` is for legacy reasons (tfds) and is not important in itself, but must be unique for each example.
+
+        # NOTE: For local datasets you will have access to self.config.data_dir and self.config.data_files
+
+        if self.config.schema == "[schema_name]":
+            # TODO: yield (key, example) tuples in the given schema
+            for key, example in thing:
+                yield key, example
+
+# This allows you to run your dataloader with `python [dataset_name].py` during development
+# TODO: Remove this before making your PR
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)
diff --git a/openassistant/utils/__init__.py b/openassistant/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/openassistant/utils/configs.py b/openassistant/utils/configs.py
new file mode 100644
index 00000000..2391f66b
--- /dev/null
+++ b/openassistant/utils/configs.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+from dataclasses import dataclass
+
+import datasets
+
+
+@dataclass
+class OpenAssistantConfig(datasets.BuilderConfig):
+    """BuilderConfig for OpenAssistant datasets."""
+
+    name: str = None
+    version: datasets.Version = None
+    description: str = None
+    schema: str = None
+    subset_id: str = None
diff --git a/openassistant/utils/schemas/__init__.py b/openassistant/utils/schemas/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/openassistant/utils/schemas/language_modeling.py b/openassistant/utils/schemas/language_modeling.py
new file mode 100644
index 00000000..0b2072a5
--- /dev/null
+++ b/openassistant/utils/schemas/language_modeling.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+"""
+Language Modeling Schema
+"""
+import datasets
+
+features = datasets.Features(
+    {
+        "text": datasets.Value("string"),
+        "meta": [datasets.Value("string")],
+    }
+)