mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
add docstring info about tokenizer matching
This commit is contained in:
@@ -36,8 +36,11 @@ TOKENIZER_CONFIGS = {
|
||||
|
||||
|
||||
def match_tokenizer_name(model_name: str) -> TokenizerConfig:
|
||||
"""Match a partial model name to a tokenizer configuration"""
|
||||
tokenizer_config_matches = [config for name, config in TOKENIZER_CONFIGS.items() if model_name in name]
|
||||
"""
|
||||
Match a partial model name to a tokenizer configuration
|
||||
i.e. model_name `Salesforce/codegen-2B-multi` has config name `codegen`
|
||||
"""
|
||||
tokenizer_config_matches = [config for name, config in TOKENIZER_CONFIGS.items() if name in model_name]
|
||||
if not tokenizer_config_matches:
|
||||
raise ValueError(f"Cannot find any tokeniser configuration to match {model_name=}")
|
||||
elif 1 < len(tokenizer_config_matches):
|
||||
|
||||
Reference in New Issue
Block a user