from transformers import AutoModelForSequenceClassification
from fastai.text.all import *
from fastai.callback.wandb import *
from fasthugs.learner import TransLearner
from fasthugs.data import TransformersTextBlock, TextGetter, get_splits
from datasets import load_dataset, concatenate_datasets
import wandb
import gc
%env WANDB_ENTITY=fastai_community
%env WANDB_PROJECT=glue-benchmark
Let's define main settings for the run in one place:
ds_name = 'glue'
model_name = "distilroberta-base"
max_len = 512
bs = 32
val_bs = bs*2
n_epoch = 4
lr = 2e-5
opt_func = Adam
diff_lr_decay_factor = 0
GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
def validate_task():
assert task in GLUE_TASKS
from fastai.metrics import MatthewsCorrCoef, F1Score, PearsonCorrCoef, SpearmanCorrCoef
glue_metrics = {
'cola':[MatthewsCorrCoef()],
'sst2':[accuracy],
'mrpc':[F1Score(), accuracy],
'stsb':[PearsonCorrCoef(), SpearmanCorrCoef()],
'qqp' :[F1Score(), accuracy],
'mnli':[accuracy],
'qnli':[accuracy],
'rte' :[accuracy],
'wnli':[accuracy],
}
glue_textfields = {
'cola':['sentence', None],
'sst2':['sentence', None],
'mrpc':['sentence1', 'sentence2'],
'stsb':['sentence1', 'sentence2'],
'qqp' :['question1', 'question2'],
'mnli':['premise', 'hypothesis'],
'qnli':['question', 'sentence'],
'rte' :['sentence1', 'sentence2'],
'wnli':['sentence1', 'sentence2'],
}
glue_num_labels = {'mnli':3, 'stsb':1}
def layerwise_splitter(model):
emb = L(model.base_model.embeddings)
layers = L(model.base_model.encoder.layer.children())
clf = L(m for m in list(model.children())[1:] if params(m))
groups = emb + layers + clf
return groups.map(params)
task = 'sst2'
validate_task()
ds = load_dataset(ds_name, task)
valid_ = 'validation-matched' if task=='mnli' else 'validation'
len(ds['train']), len(ds[valid_])
train_idx, valid_idx = get_splits(ds, valid=valid_)
train_ds = concatenate_datasets([ds['train'], ds[valid_]])
train_ds[0]
Here I use number of characters a proxy for length of tokenized text to speed up dls
creation.
lens = train_ds.map(lambda s: {'len': sum([len(s[i]) for i in glue_textfields[task] if i])},
remove_columns=train_ds.column_names, num_proc=2, keep_in_memory=True)
train_lens = lens.select(train_idx)['len']
valid_lens = lens.select(valid_idx)['len']
dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), CategoryBlock()],
get_x=TextGetter(*glue_textfields[task]),
get_y=ItemGetter('label'),
splitter=IndexSplitter(valid_idx))
%%time
dl_kwargs=[{'res':train_lens}, {'val_res':valid_lens}]
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs, dl_kwargs=dl_kwargs)
dls.show_batch(max_n=4)
WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
if diff_lr_decay_factor: GROUP += f"diff_lr_{diff_lr_decay_factor}"
NOTES = f'finetuning {model_name} with {opt_func.__name__} lr={lr:.0e}'
TAGS =[model_name, ds_name, opt_func.__name__]
wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS);
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=glue_num_labels.get(task, 2))
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics, opt_func=opt_func, splitter=layerwise_splitter).to_fp16()
if diff_lr_decay_factor != 0:
k = len(layerwise_splitter(model))
lr = slice(lr*diff_lr_decay_factor**k,lr)
metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
cbs = [WandbCallback(log_preds=False, log_model=False), SaveModelCallback(monitor=metric_to_monitor)]
learn.fit_one_cycle(4, lr, cbs=cbs)
learn.show_results()
# preds = learn.get_preds(dl=test_dl)
del learn
gc.collect()
torch.cuda.empty_cache()
wandb.login()
def train():
with wandb.init() as run:
cfg = run.config
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=glue_num_labels.get(task, 2))
metrics = glue_metrics[task]
k = len(layerwise_splitter(model))
if cfg.diff_lr_decay_factor: lr = slice(cfg.lr*cfg.diff_lr_decay_factor**k,cfg.lr)
learn = TransLearner(dls, model, metrics=metrics, opt_func=Adam, splitter=layerwise_splitter).to_fp16()
learn.fit_one_cycle(n_epoch, cfg.lr, wd=cfg.wd, cbs=[WandbCallback(log_preds=False, log_model=False)])
del learn
gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()
metrics = glue_metrics[task]
metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
sweep_name = f"glue-{task}-sweep"
sweep_config = {
"project":"glue-benchmark",
"entity": "fastai_cimmunity",
"name": sweep_name,
"method": "random",
"parameters": {
"lr": {"values":[1e-5,2e-5,3e-5,5e-5, 1e-4, 3e-4]},
"wd": {"values":[0.,1e-2,5e-2]},
"diff_lr_decay_factor":{"values":[0., 0.9, 0.8, 0.7, 0.6]}
},
"metric":{"goal": "maximise", "name": metric_to_monitor},
"early_terminate": {"type": "hyperband", "s": 2, "eta": 3, "max_iter": 40}
}
sweep_id = wandb.sweep(sweep_config, project='glue-benchmark', entity="fastai_community")
wandb.agent(sweep_id, function=train)
wandb.finish()
task = 'mnli'
validate_task()
ds = load_dataset(ds_name, task)
train_idx, valid_idx = get_splits(ds, valid='validation_matched')
train_ds = concatenate_datasets([ds['train'], ds['validation_matched']])
train_ds[0]
lens = train_ds.map(lambda s: {'len': len(s['premise'])+len(s['hypothesis'])}, remove_columns=train_ds.column_names, num_proc=4, keep_in_memory=True)
train_lens = lens.select(train_idx)['len']
valid_lens = lens.select(valid_idx)['len']
dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name),
CategoryBlock()],
get_x=TextGetter('premise', 'hypothesis'),
get_y=ItemGetter('label'),
splitter=IndexSplitter(valid_idx))
%%time
dl_kwargs=[{'res':train_lens}, {'val_res':valid_lens}]
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs, dl_kwargs=dl_kwargs, num_workers=4)
dls.show_batch(max_n=4)
lr=5e-5
wd=0.01
WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with Adam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'adam']
wandb.init(reinit=True, project="glue-benchmark", entity="fastai_community",
name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS);
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()
metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
cbs = [WandbCallback(log_preds=False, log_model=False)]
# cbs += [SaveModelCallback(monitor=metric_to_monitor)]
learn.fit_one_cycle(4, lr, wd=wd, cbs=cbs)
learn.show_results()
valid_mm_dl = dls.test_dl(ds['validation_mismatched'], with_labels=True)
learn.validate(dl=valid_mm_dl)