HF inference fixes

This commit is contained in:
Yannic Kilcher
2023-02-09 00:27:51 +01:00
parent 4f821f0f8b
commit 27671e3220
2 changed files with 3 additions and 2 deletions
+2 -1
View File
@@ -54,7 +54,7 @@ def main(
"top_p": work_request.top_p,
"temperature": work_request.temperature,
"seed": work_request.seed,
# "stop": ["User:", "Assistant:"], # TODO: this doesn't work... why?
# "stop": ["\nUser:", "\nAssistant:"], # TODO: make this a bit more workable because it's mutliple tokens
},
},
stream=True,
@@ -64,6 +64,7 @@ def main(
response.raise_for_status()
except requests.HTTPError:
logger.exception("Failed to get response from inference server")
logger.error(f"Response: {response.text}")
return
client = sseclient.SSEClient(response)
@@ -13,7 +13,7 @@ class WorkRequest(pydantic.BaseModel):
conversation: protocol.Conversation = pydantic.Field(..., repr=False)
model_name: str = "distilgpt2"
max_new_tokens: int = 100
seed: int = pydantic.Field(default_factory=lambda: random.randint(-(2**31), 2**31 - 1))
seed: int = pydantic.Field(default_factory=lambda: random.randint(0, 2**31 - 1))
do_sample: bool = True
top_k: int = 50
top_p: float = 0.9