Platypus
In [ ]:
Copied!
! pip install accelerate peft bitsandbytes transformers trl
! pip install accelerate peft bitsandbytes transformers trl
Collecting accelerate Downloading accelerate-0.21.0-py3-none-any.whl (244 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 244.2/244.2 kB 4.4 MB/s eta 0:00:00 Collecting peft Downloading peft-0.4.0-py3-none-any.whl (72 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 72.9/72.9 kB 11.6 MB/s eta 0:00:00 Collecting bitsandbytes Downloading bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 92.6/92.6 MB 9.0 MB/s eta 0:00:00 Collecting transformers Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.4/7.4 MB 115.2 MB/s eta 0:00:00 Collecting trl Downloading trl-0.5.0-py3-none-any.whl (88 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 88.1/88.1 kB 13.0 MB/s eta 0:00:00 Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.23.5) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.1) Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5) Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1) Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118) Collecting safetensors (from peft) Downloading safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 68.7 MB/s eta 0:00:00 Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2) Collecting huggingface-hub<1.0,>=0.14.1 (from transformers) Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 268.8/268.8 kB 33.6 MB/s eta 0:00:00 Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3) Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0) Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers) Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 115.8 MB/s eta 0:00:00 Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1) Collecting datasets (from trl) Downloading datasets-2.14.4-py3-none-any.whl (519 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 519.3/519.3 kB 53.1 MB/s eta 0:00:00 Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0) Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1) Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12) Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1) Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2) Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0) Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.2) Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6) Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->trl) (9.0.0) Collecting dill<0.3.8,>=0.3.0 (from datasets->trl) Downloading dill-0.3.7-py3-none-any.whl (115 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 115.3/115.3 kB 17.1 MB/s eta 0:00:00 Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->trl) (1.5.3) Collecting xxhash (from datasets->trl) Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.1/194.1 kB 24.0 MB/s eta 0:00:00 Collecting multiprocess (from datasets->trl) Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.8/134.8 kB 18.7 MB/s eta 0:00:00 Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->trl) (3.8.5) Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.2.0) Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4) Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.4) Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22) Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (23.1.0) Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (6.0.4) Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (4.0.3) Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (1.9.2) Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (1.4.0) Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->trl) (1.3.1) Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3) Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->trl) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->trl) (2023.3) Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets->trl) (1.16.0) Installing collected packages: tokenizers, safetensors, bitsandbytes, xxhash, dill, multiprocess, huggingface-hub, transformers, datasets, accelerate, trl, peft Successfully installed accelerate-0.21.0 bitsandbytes-0.41.1 datasets-2.14.4 dill-0.3.7 huggingface-hub-0.16.4 multiprocess-0.70.15 peft-0.4.0 safetensors-0.3.2 tokenizers-0.13.3 transformers-4.31.0 trl-0.5.0 xxhash-3.3.0
In [ ]:
Copied!
from huggingface_hub import notebook_login
notebook_login()
from huggingface_hub import notebook_login
notebook_login()
In [ ]:
Copied!
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os
def platypus_training():
data = load_dataset("garage-bAInd/Open-Platypus", split="train")
data_df = data.to_pandas()
data_df["text"] = data_df[["instruction", "output"]].apply(lambda x: x["instruction"] + " " + x["output"], axis=1)
data_df.drop(["instruction", "output"], axis=1, inplace=True)
data = Dataset.from_pandas(data_df)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", quantization_config=bnb_config, device_map={"": 0}
)
model.config.use_cache=False
model.config.pretraining_tp=1
peft_config = LoraConfig(
r=16,
lora_alpha=16,
target_modules = ["gate_proj" , "down_proj", "up_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
training_arguments = TrainingArguments(
output_dir="platypus_llama_7b",
per_device_train_batch_size=8,
gradient_accumulation_steps=4,
optim="paged_adamw_32bit",
learning_rate=2e-4,
lr_scheduler_type="cosine",
save_strategy="steps",
save_steps = 50,
save_total_limit = 100,
logging_steps=10,
num_train_epochs=1,
max_steps=110,
fp16=True,
push_to_hub=True
)
trainer = SFTTrainer(
model=model,
train_dataset=data,
peft_config=peft_config,
dataset_text_field="text",
args=training_arguments,
tokenizer=tokenizer,
packing=False,
max_seq_length=512
)
trainer.train()
trainer.push_to_hub()
trainer.save_model("platypus_llama_7b")
output_dir = os.path.join("platypus_llama_7b", "final_checkpoint")
trainer.model.save_pretrained(output_dir)
if __name__ == "__main__":
platypus_training()
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os
def platypus_training():
data = load_dataset("garage-bAInd/Open-Platypus", split="train")
data_df = data.to_pandas()
data_df["text"] = data_df[["instruction", "output"]].apply(lambda x: x["instruction"] + " " + x["output"], axis=1)
data_df.drop(["instruction", "output"], axis=1, inplace=True)
data = Dataset.from_pandas(data_df)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", quantization_config=bnb_config, device_map={"": 0}
)
model.config.use_cache=False
model.config.pretraining_tp=1
peft_config = LoraConfig(
r=16,
lora_alpha=16,
target_modules = ["gate_proj" , "down_proj", "up_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
training_arguments = TrainingArguments(
output_dir="platypus_llama_7b",
per_device_train_batch_size=8,
gradient_accumulation_steps=4,
optim="paged_adamw_32bit",
learning_rate=2e-4,
lr_scheduler_type="cosine",
save_strategy="steps",
save_steps = 50,
save_total_limit = 100,
logging_steps=10,
num_train_epochs=1,
max_steps=110,
fp16=True,
push_to_hub=True
)
trainer = SFTTrainer(
model=model,
train_dataset=data,
peft_config=peft_config,
dataset_text_field="text",
args=training_arguments,
tokenizer=tokenizer,
packing=False,
max_seq_length=512
)
trainer.train()
trainer.push_to_hub()
trainer.save_model("platypus_llama_7b")
output_dir = os.path.join("platypus_llama_7b", "final_checkpoint")
trainer.model.save_pretrained(output_dir)
if __name__ == "__main__":
platypus_training()
In [ ]:
Copied!
! cp -r /content/platypus_llama_7b /content/drive/MyDrive/
! cp -r /content/platypus_llama_7b /content/drive/MyDrive/
In [ ]:
Copied!
torch.cuda.empty_cache()
torch.cuda.empty_cache()
In [ ]:
Copied!
# from peft import AutoPeftModelForCausalLM
# from transformers import BitsAndBytesConfig
# import os
# import torch
# model = AutoPeftModelForCausalLM.from_pretrained("/content/drive/MyDrive/platypus_llama_7b/final_checkpoint", device_map={"": 0})
# model = model.merge_and_unload()
# output_merged_dir = os.path.join("/content/drive/MyDrive/platypus_llama_7b", "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)
# from peft import AutoPeftModelForCausalLM
# from transformers import BitsAndBytesConfig
# import os
# import torch
# model = AutoPeftModelForCausalLM.from_pretrained("/content/drive/MyDrive/platypus_llama_7b/final_checkpoint", device_map={"": 0})
# model = model.merge_and_unload()
# output_merged_dir = os.path.join("/content/drive/MyDrive/platypus_llama_7b", "final_merged_checkpoint")
# model.save_pretrained(output_merged_dir, safe_serialization=True)
In [ ]:
Copied!