Files
SimPO/upload.ipynb
T
2024-10-01 20:31:48 +00:00

1545 lines
31 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from huggingface_hub import HfApi\n",
"\n",
"api = HfApi()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RepoUrl('https://huggingface.co/wassname/llama-3-2-1b-sft', endpoint='https://huggingface.co', repo_type='model', repo_id='wassname/llama-3-2-1b-sft')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"api.create_repo(\n",
" # folder_path=\"/workspace/checkpoints_new/llama-3-2-1b-sft\",\n",
" repo_id=\"wassname/llama-3-2-1b-sft\",\n",
" repo_type=\"model\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Upload 10 LFS files: 0%| | 0/10 [00:00<?, ?it/s]\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"scheduler.pt: 100%|██████████| 1.06k/1.06k [00:00<00:00, 3.81kB/s]\n",
"rng_state.pth: 100%|██████████| 14.2k/14.2k [00:00<00:00, 49.7kB/s]\n",
"zero_pp_rank_0_mp_rank_00_model_states.pt: 100%|██████████| 76.3k/76.3k [00:00<00:00, 221kB/s]\n",
"\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\n",
"\n",
"training_args.bin: 100%|██████████| 7.03k/7.03k [00:00<00:00, 39.2kB/s]\n",
"\n",
"\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"tokenizer.json: 100%|██████████| 17.2M/17.2M [00:01<00:00, 9.38MB/s]\n",
"tokenizer.json: 100%|██████████| 17.2M/17.2M [00:01<00:00, 11.4MB/s]\n",
"\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"training_args.bin: 100%|██████████| 7.03k/7.03k [00:00<00:00, 30.1kB/s]\n",
"\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"\n",
"\n",
"\u001b[A\u001b[A\u001b[A\u001b[A\n",
"\n",
"model.safetensors: 100%|██████████| 2.47G/2.47G [01:13<00:00, 33.7MB/s]\n",
"\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"\u001b[A\u001b[A\n",
"\n",
"model.safetensors: 100%|██████████| 2.47G/2.47G [01:18<00:00, 31.5MB/s]\n",
"bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt: 100%|██████████| 14.8G/14.8G [06:58<00:00, 35.5MB/s]\n",
"Upload 10 LFS files: 100%|██████████| 10/10 [06:58<00:00, 41.90s/it] \n"
]
},
{
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/wassname/llama-3-2-1b-sft/commit/07f81f552794458685788579cc84929475422cf3', commit_message='Upload folder using huggingface_hub', commit_description='', oid='07f81f552794458685788579cc84929475422cf3', pr_url=None, repo_url=RepoUrl('https://huggingface.co/wassname/llama-3-2-1b-sft', endpoint='https://huggingface.co', repo_type='model', repo_id='wassname/llama-3-2-1b-sft'), pr_revision=None, pr_num=None)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"api.upload_folder(\n",
" folder_path=\"/workspace/checkpoints_new/llama-3-2-1b-sft\",\n",
" repo_id=\"wassname/llama-3-2-1b-sft\",\n",
" repo_type=\"model\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"|"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}