DPO Part2 DPO
In [ ]:
Copied!
! cp -r /content/drive/MyDrive/sft_santacoder1b /content/
! cp -r /content/drive/MyDrive/sft_santacoder1b /content/
In [ ]:
Copied!
from google.colab import drive
drive.mount('/content/drive')
from google.colab import drive
drive.mount('/content/drive')
In [ ]:
Copied!
! pip install datasets peft transformers trl accelerate bitsandbytes
! pip install datasets peft transformers trl accelerate bitsandbytes
In [ ]:
Copied!
from huggingface_hub import notebook_login
notebook_login()
from huggingface_hub import notebook_login
notebook_login()
In [ ]:
Copied!
# 0. imports
import os
from dataclasses import dataclass, field
from typing import Dict, Optional
import torch
from datasets import Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from transformers import AutoTokenizer, HfArgumentParser, TrainingArguments
from trl import DPOTrainer
def dpo_data(train_or_val):
dataset = load_dataset(
"Dahoas/full-hh-rlhf",
split = "train",
use_auth_token=True
)
original_columns = dataset.column_names
def return_prompt_and_responses(samples):
return {
"prompt": [prompt for prompt in samples["prompt"]],
"chosen": samples["chosen"],
"rejected": samples["rejected"],
}
return dataset.map(
return_prompt_and_responses,
batched=True,
remove_columns=original_columns,
)
if __name__ == "__main__":
# 1. load a pretrained model
model = AutoPeftModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b",
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
load_in_4bit=True,
)
model.config.use_cache = False
model_ref = AutoPeftModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b/",
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained("/content/sft_santacoder1b/")
tokenizer.pad_token = tokenizer.eos_token
train_dataset = dpo_data("train")
train_dataset = train_dataset.filter(
lambda x: len(x["prompt"]) + len(x["chosen"]) <= 256
and len(x["prompt"]) + len(x["rejected"]) <= 256
)
eval_dataset = dpo_data("val")
eval_dataset = eval_dataset.filter(
lambda x: len(x["prompt"]) + len(x["chosen"]) <= 256
and len(x["prompt"]) + len(x["rejected"]) <= 256
)
training_args = TrainingArguments(
per_device_train_batch_size=2,
max_steps=505,
logging_steps=10,
save_steps=500,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
learning_rate=2e-4,
evaluation_strategy="steps",
eval_steps=100,
output_dir="dpo_santacoder1b",
report_to="tensorboard",
lr_scheduler_type="cosine",
warmup_steps=2,
optim="paged_adamw_32bit",
fp16=True,
remove_unused_columns=False,
run_name="dpo_llama2",
)
peft_config = LoraConfig(
r=8,
lora_alpha=16,
lora_dropout=0.05,
target_modules=["c_attn", "c_proj"],
bias="none",
task_type="CAUSAL_LM",
)
dpo_trainer = DPOTrainer(
model,
model_ref,
args=training_args,
beta=0.1,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
peft_config=peft_config,
max_prompt_length=128,
max_length=256,
)
# 6. train
dpo_trainer.train()
dpo_trainer.save_model("dpo_santacoder1b")
# 7. save
output_dir = os.path.join("dpo_santacoder1b", "final_checkpoint")
dpo_trainer.model.save_pretrained(output_dir)
# 0. imports
import os
from dataclasses import dataclass, field
from typing import Dict, Optional
import torch
from datasets import Dataset, load_dataset
from peft import AutoPeftModelForCausalLM, LoraConfig
from transformers import AutoTokenizer, HfArgumentParser, TrainingArguments
from trl import DPOTrainer
def dpo_data(train_or_val):
dataset = load_dataset(
"Dahoas/full-hh-rlhf",
split = "train",
use_auth_token=True
)
original_columns = dataset.column_names
def return_prompt_and_responses(samples):
return {
"prompt": [prompt for prompt in samples["prompt"]],
"chosen": samples["chosen"],
"rejected": samples["rejected"],
}
return dataset.map(
return_prompt_and_responses,
batched=True,
remove_columns=original_columns,
)
if __name__ == "__main__":
# 1. load a pretrained model
model = AutoPeftModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b",
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
load_in_4bit=True,
)
model.config.use_cache = False
model_ref = AutoPeftModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b/",
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained("/content/sft_santacoder1b/")
tokenizer.pad_token = tokenizer.eos_token
train_dataset = dpo_data("train")
train_dataset = train_dataset.filter(
lambda x: len(x["prompt"]) + len(x["chosen"]) <= 256
and len(x["prompt"]) + len(x["rejected"]) <= 256
)
eval_dataset = dpo_data("val")
eval_dataset = eval_dataset.filter(
lambda x: len(x["prompt"]) + len(x["chosen"]) <= 256
and len(x["prompt"]) + len(x["rejected"]) <= 256
)
training_args = TrainingArguments(
per_device_train_batch_size=2,
max_steps=505,
logging_steps=10,
save_steps=500,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
learning_rate=2e-4,
evaluation_strategy="steps",
eval_steps=100,
output_dir="dpo_santacoder1b",
report_to="tensorboard",
lr_scheduler_type="cosine",
warmup_steps=2,
optim="paged_adamw_32bit",
fp16=True,
remove_unused_columns=False,
run_name="dpo_llama2",
)
peft_config = LoraConfig(
r=8,
lora_alpha=16,
lora_dropout=0.05,
target_modules=["c_attn", "c_proj"],
bias="none",
task_type="CAUSAL_LM",
)
dpo_trainer = DPOTrainer(
model,
model_ref,
args=training_args,
beta=0.1,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
peft_config=peft_config,
max_prompt_length=128,
max_length=256,
)
# 6. train
dpo_trainer.train()
dpo_trainer.save_model("dpo_santacoder1b")
# 7. save
output_dir = os.path.join("dpo_santacoder1b", "final_checkpoint")
dpo_trainer.model.save_pretrained(output_dir)
In [ ]:
Copied!
from transformers import AutoModelForCausalLM
from peft import PeftModel
model = AutoModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b/final_merged_checkpoint/", return_dict=True, torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(model, "/content/dpo_santacoder1b/final_checkpoint/")
model.eval()
model = model.merge_and_unload()
model.save_pretrained("/content/dpo_santacoder1b/final_merged_checkpoint")
tokenizer.save_pretrained("/content/dpo_santacoder1b/final_merged_checkpoint")
model.push_to_hub("dpo-santacoder1b")
tokenizer.push_to_hub("dpo-santacoder1b")
from transformers import AutoModelForCausalLM
from peft import PeftModel
model = AutoModelForCausalLM.from_pretrained(
"/content/sft_santacoder1b/final_merged_checkpoint/", return_dict=True, torch_dtype=torch.float16
)
model = PeftModel.from_pretrained(model, "/content/dpo_santacoder1b/final_checkpoint/")
model.eval()
model = model.merge_and_unload()
model.save_pretrained("/content/dpo_santacoder1b/final_merged_checkpoint")
tokenizer.save_pretrained("/content/dpo_santacoder1b/final_merged_checkpoint")
model.push_to_hub("dpo-santacoder1b")
tokenizer.push_to_hub("dpo-santacoder1b")
In [ ]:
Copied!
! cp -r /content/dpo_santacoder1b /content/drive/MyDrive/SantaCoder-DPO
! cp -r /content/dpo_santacoder1b /content/drive/MyDrive/SantaCoder-DPO
In [ ]:
Copied!