Chatbot
In [ ]:
Copied!
! nvidia-smi
! nvidia-smi
In [ ]:
Copied!
from google.colab import drive
drive.mount('/content/drive')
from google.colab import drive
drive.mount('/content/drive')
In [ ]:
Copied!
! pip install transformers datasets accelerate peft
! pip install transformers datasets accelerate peft
In [ ]:
Copied!
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import LoraConfig, get_peft_model, TaskType
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
import pandas as pd
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import LoraConfig, get_peft_model, TaskType
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
In [ ]:
Copied!
train_df = pd.read_parquet("train.parquet")
test_df = pd.read_parquet("test.parquet")
train_data = Dataset.from_pandas(train_df)
test_data = Dataset.from_pandas(test_df)
train_df = pd.read_parquet("train.parquet")
test_df = pd.read_parquet("test.parquet")
train_data = Dataset.from_pandas(train_df)
test_data = Dataset.from_pandas(test_df)
In [ ]:
Copied!
model_id="google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
model_id="google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
In [ ]:
Copied!
def preprocess_function(sample,padding="max_length"):
model_inputs = tokenizer(sample["Human"], max_length=256, padding=padding, truncation=True)
labels = tokenizer(sample["Assistant"], max_length=256, padding=padding, truncation=True)
if padding == "max_length":
labels["input_ids"] = [
[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
]
model_inputs["labels"] = labels["input_ids"]
return model_inputs
def preprocess_function(sample,padding="max_length"):
model_inputs = tokenizer(sample["Human"], max_length=256, padding=padding, truncation=True)
labels = tokenizer(sample["Assistant"], max_length=256, padding=padding, truncation=True)
if padding == "max_length":
labels["input_ids"] = [
[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
]
model_inputs["labels"] = labels["input_ids"]
return model_inputs
In [ ]:
Copied!
train_tokenized_dataset = train_data.map(preprocess_function, batched=True, remove_columns=train_data.column_names)
test_tokenized_dataset = test_data.map(preprocess_function, batched=True, remove_columns=test_data.column_names)
print(f"Keys of tokenized dataset: {list(train_tokenized_dataset.features)}")
train_tokenized_dataset = train_data.map(preprocess_function, batched=True, remove_columns=train_data.column_names)
test_tokenized_dataset = test_data.map(preprocess_function, batched=True, remove_columns=test_data.column_names)
print(f"Keys of tokenized dataset: {list(train_tokenized_dataset.features)}")
In [ ]:
Copied!
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q", "v"],
lora_dropout=0.1,
bias="none",
task_type=TaskType.SEQ_2_SEQ_LM
)
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q", "v"],
lora_dropout=0.1,
bias="none",
task_type=TaskType.SEQ_2_SEQ_LM
)
In [ ]:
Copied!
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
In [ ]:
Copied!
label_pad_token_id = -100
data_collator = DataCollatorForSeq2Seq(
tokenizer,
model=model,
label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=8
)
label_pad_token_id = -100
data_collator = DataCollatorForSeq2Seq(
tokenizer,
model=model,
label_pad_token_id=label_pad_token_id,
pad_to_multiple_of=8
)
In [ ]:
Copied!
from huggingface_hub import notebook_login
notebook_login()
from huggingface_hub import notebook_login
notebook_login()
In [ ]:
Copied!
output_dir="lora-flan-t5-large-chat"
training_args = Seq2SeqTrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=4,
learning_rate=1e-3,
num_train_epochs=1,
logging_dir=f"{output_dir}/logs",
logging_strategy="epoch",
save_strategy="epoch",
report_to="tensorboard",
push_to_hub = True
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_tokenized_dataset,
)
model.config.use_cache = False
trainer.train()
peft_save_model_id="lora-flan-t5-large-chat"
trainer.model.save_pretrained(peft_save_model_id, push_to_hub=True)
tokenizer.save_pretrained(peft_save_model_id, push_to_hub=True)
trainer.model.base_model.save_pretrained(peft_save_model_id, push_to_hub=True)
output_dir="lora-flan-t5-large-chat"
training_args = Seq2SeqTrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=4,
learning_rate=1e-3,
num_train_epochs=1,
logging_dir=f"{output_dir}/logs",
logging_strategy="epoch",
save_strategy="epoch",
report_to="tensorboard",
push_to_hub = True
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_tokenized_dataset,
)
model.config.use_cache = False
trainer.train()
peft_save_model_id="lora-flan-t5-large-chat"
trainer.model.save_pretrained(peft_save_model_id, push_to_hub=True)
tokenizer.save_pretrained(peft_save_model_id, push_to_hub=True)
trainer.model.base_model.save_pretrained(peft_save_model_id, push_to_hub=True)
In [ ]:
Copied!
! cp -r /content/lora-flan-t5-large-chat/ /content/drive/MyDrive/Chatbot/
! cp -r /content/lora-flan-t5-large-chat/ /content/drive/MyDrive/Chatbot/
In [ ]:
Copied!
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Load peft config for pre-trained checkpoint etc.
peft_model_id = "lora-flan-t5-large-chat"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id, device_map={"":0}).cuda()
model.eval()
sample = "Human: \nExplain me about the working of Artificial Intelligence. \nAssistant: "
input_ids = tokenizer(sample, return_tensors="pt", truncation=True, max_length=256).input_ids.cuda()
outputs = model.generate(input_ids=input_ids, do_sample=True, top_p=0.9, max_length=256)
print(f"{sample}")
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Load peft config for pre-trained checkpoint etc.
peft_model_id = "lora-flan-t5-large-chat"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id, device_map={"":0}).cuda()
model.eval()
sample = "Human: \nExplain me about the working of Artificial Intelligence. \nAssistant: "
input_ids = tokenizer(sample, return_tensors="pt", truncation=True, max_length=256).input_ids.cuda()
outputs = model.generate(input_ids=input_ids, do_sample=True, top_p=0.9, max_length=256)
print(f"{sample}")
print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])
In [ ]:
Copied!