diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py index d8fb60d7..8ff4f9e7 100644 --- a/model/reward/instructor/experimental_dataset.py +++ b/model/reward/instructor/experimental_dataset.py @@ -60,7 +60,7 @@ class HFSummaryQuality(Dataset): def __init__(self, split, tokenizer, max_length=300) -> None: super().__init__() assert split in ("validation", "test") - dataset = load_dataset("Tristan/summarize_from_feedback", "axis")[split] + dataset = load_dataset("openai/summarize_from_feedback", "axis")[split] self.max_length = max_length mean_scores = defaultdict(list) self.contexts = [] diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index f63af85a..a5c4b4fd 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -118,7 +118,7 @@ class HFSummary(Dataset): self.index2summary = {} self.max_comparison_per_sample = max_comparison_per_sample major_split = split if "train" == split else "validation" - dataset = load_dataset("Tristan/summarize_from_feedback", "comparisons")[major_split] + dataset = load_dataset("openai/summarize_from_feedback", "comparisons")[major_split] for data in dataset: if ( "extra" in data