英文:
Lora fine-tuning taking too long
问题
以下是翻译好的部分:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import transformers
import torch
model_id = "tiiuae/falcon-40b-instruct"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto", trust_remote_code=True)
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["query_key_value"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
trainer = transformers.Trainer(
model=model,
train_dataset=tokenized_data,
args=transformers.TrainingArguments(
num_train_epochs=100,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
warmup_ratio=0.05,
learning_rate=2e-4,
fp16=False,
logging_steps=1,
output_dir="output",
optim="paged_adamw_8bit",
lr_scheduler_type='cosine',
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()
英文:
Any reason why this is giving me a month of expected processing time?
More importantly, how to speed this up?
My dataset is a collection of 20k short sentences (max 100 words each).
import transformers
import torch
model_id = "tiiuae/falcon-40b-instruct"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto", trust_remote_code=True)
from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
from peft import LoraConfig, get_peft_model
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["query_key_value"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
trainer = transformers.Trainer(
model=model,
train_dataset=tokenized_data,
args=transformers.TrainingArguments(
num_train_epochs=100,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
warmup_ratio=0.05,
learning_rate=2e-4,
fp16=False,
logging_steps=1,
output_dir="output",
optim="paged_adamw_8bit",
lr_scheduler_type='cosine',
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()```
</details>
# 答案1
**得分**: 1
我认为原因是因为 GPU 没有被使用,如果你有一个 GPU,你可以启用它,并使用它进行微调。
<details>
<summary>英文:</summary>
I think the reason is because the gpu is not being used, if you have one you could enable it and do fine tuning with that.
</details>
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论