Unsloth ist ein Framework zum Fine-Tuning und Reinforcement Learning (RL) großer Sprachmodelle ([[LLM]]s). Unsloth wirbt damit, Fine-Tuning und Inferenzprozesse 2 × bis 30 × schneller durchzuführen als herkömmliche Ansätze wie z. B. FlashAttention2 (“FA2”). Der Speicherbedarf (VRAM) soll deutlich reduziert sein beispielsweise bei Quantisierung auf 4-Bit (QLoRA) geringerer Verbrauch gegenüber älteren Methoden. Unterstützung von Modellen im Transformer-Stil, inkl. Sprach-, Text- und multimodale Modelle. [[https://github.com/unslothai/unsloth|Github Repo]], [[https://unsloth.ai/|Homepage]]
Nvidia [[CUDA]] vorausgesetzt!?
wget -qO- https://raw.githubusercontent.com/unslothai/unsloth/main/unsloth/_auto_install.py | python -
Docker
# Requires NVIDIA Container Toolkit
docker run -d -e JUPYTER_PASSWORD="mypassword" \
-p 8888:8888 -p 2222:22 \
-v $(pwd)/work:/workspace/work \
--gpus all \
unsloth/unsloth
from unsloth import FastLanguageModel, FastModel
import torch
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
max_seq_length = 2048 # Supports RoPE Scaling internally, so choose any!
# Get LAION dataset
url = "https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl"
dataset = load_dataset("json", data_files = {"train" : url}, split = "train")
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
"unsloth/gpt-oss-20b-unsloth-bnb-4bit", #or choose any model
] # More models at https://huggingface.co/unsloth
model, tokenizer = FastModel.from_pretrained(
model_name = "unsloth/gpt-oss-20b",
max_seq_length = 2048, # Choose any for long context!
load_in_4bit = True, # 4-bit quantization. False = 16-bit LoRA.
load_in_8bit = False, # 8-bit quantization
load_in_16bit = False, # [NEW!] 16-bit LoRA
full_finetuning = False, # Use for full fine-tuning.
# token = "hf_...", # use one if using gated models
)
# Do model patching and add fast LoRA weights
model = FastLanguageModel.get_peft_model(
model,
r = 16,
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",],
lora_alpha = 16,
lora_dropout = 0, # Supports any, but = 0 is optimized
bias = "none", # Supports any, but = "none" is optimized
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
random_state = 3407,
max_seq_length = max_seq_length,
use_rslora = False, # We support rank stabilized LoRA
loftq_config = None, # And LoftQ
)
trainer = SFTTrainer(
model = model,
train_dataset = dataset,
tokenizer = tokenizer,
args = SFTConfig(
max_seq_length = max_seq_length,
per_device_train_batch_size = 2,
gradient_accumulation_steps = 4,
warmup_steps = 10,
max_steps = 60,
logging_steps = 1,
output_dir = "outputs",
optim = "adamw_8bit",
seed = 3407,
),
)
trainer.train()
[[https://www.youtube.com/watch?v=jFl5Fewrieo|EASIEST Way to Train LLM Train w/ unsloth]]