from transformers import AutoModelForSequenceClassification
from fastai.text.all import *
from fastai.callback.wandb import *

from fasthugs.learner import TransLearner
from fasthugs.data import TransformersTextBlock, TextGetter, get_splits

from datasets import load_dataset, concatenate_datasets

import wandb
import gc
%env WANDB_ENTITY=fastai_community
%env WANDB_PROJECT=glue-benchmark
env: WANDB_ENTITY=fastai_community
env: WANDB_PROJECT=glue-benchmark

Setup

Let's define main settings for the run in one place:

ds_name = 'glue'
model_name = "distilroberta-base"

max_len = 512
bs = 32
val_bs = bs*2

n_epoch = 4
lr = 2e-5
opt_func = Adam
diff_lr_decay_factor = 0
GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
def validate_task():
    assert task in GLUE_TASKS
from fastai.metrics import MatthewsCorrCoef, F1Score, PearsonCorrCoef, SpearmanCorrCoef
glue_metrics = {
    'cola':[MatthewsCorrCoef()],
    'sst2':[accuracy],
    'mrpc':[F1Score(), accuracy],
    'stsb':[PearsonCorrCoef(), SpearmanCorrCoef()],
    'qqp' :[F1Score(), accuracy],
    'mnli':[accuracy],
    'qnli':[accuracy],
    'rte' :[accuracy],
    'wnli':[accuracy],
}

glue_textfields = {
    'cola':['sentence', None],
    'sst2':['sentence', None],
    'mrpc':['sentence1', 'sentence2'],
    'stsb':['sentence1', 'sentence2'],
    'qqp' :['question1', 'question2'],
    'mnli':['premise', 'hypothesis'],
    'qnli':['question', 'sentence'],
    'rte' :['sentence1', 'sentence2'],
    'wnli':['sentence1', 'sentence2'],
}

glue_num_labels = {'mnli':3, 'stsb':1}
def layerwise_splitter(model):
    emb = L(model.base_model.embeddings)
    layers = L(model.base_model.encoder.layer.children())
    clf = L(m for m in list(model.children())[1:] if params(m))
    groups = emb + layers + clf
    return groups.map(params)

Running a GLUE task

task = 'sst2'
validate_task()
ds = load_dataset(ds_name, task)
Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
valid_ = 'validation-matched' if task=='mnli' else 'validation'
len(ds['train']), len(ds[valid_])
(67349, 872)
train_idx, valid_idx = get_splits(ds, valid=valid_)
train_ds = concatenate_datasets([ds['train'], ds[valid_]])
train_ds[0]
{'idx': 0,
 'label': 0,
 'sentence': 'hide new secretions from the parental units '}

Here I use number of characters a proxy for length of tokenized text to speed up dls creation.

lens = train_ds.map(lambda s: {'len': sum([len(s[i]) for i in glue_textfields[task] if i])},
                    remove_columns=train_ds.column_names, num_proc=2, keep_in_memory=True)
train_lens = lens.select(train_idx)['len']
valid_lens = lens.select(valid_idx)['len']

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), CategoryBlock()],
                   get_x=TextGetter(*glue_textfields[task]),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))
%%time
dl_kwargs=[{'res':train_lens}, {'val_res':valid_lens}]
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs, dl_kwargs=dl_kwargs)
CPU times: user 16.9 s, sys: 992 ms, total: 17.9 s
Wall time: 17.8 s
dls.show_batch(max_n=4)
text category
0 ... spiced with humor ('i speak fluent flatula,'advises denlopp after a rather, er, bubbly exchange with an alien deckhand ) and witty updatings ( silver's parrot has been replaced with morph, a cute alien creature who mimics everyone and everything around ) 1
1 stopped thinking about how good it all was, and started doing nothing but reacting to it - feeling a part of its grand locations, thinking urgently as the protagonists struggled, feeling at the mercy of its inventiveness, gasping at its visual delights. 1
2 ozpetek offers an aids subtext, skims over the realities of gay sex, and presents yet another tired old vision of the gay community as an all-inclusive world where uptight, middle class bores like antonia can feel good about themselves. 0
3 's... worth the extra effort to see an artist, still committed to growth in his ninth decade, change while remaining true to his principles with a film whose very subject is, quite pointedly, about the peril of such efforts. 1

Single run

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
if diff_lr_decay_factor: GROUP += f"diff_lr_{diff_lr_decay_factor}"
NOTES = f'finetuning {model_name} with {opt_func.__name__} lr={lr:.0e}'
TAGS =[model_name, ds_name, opt_func.__name__]
wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS);
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=glue_num_labels.get(task, 2))
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics, opt_func=opt_func, splitter=layerwise_splitter).to_fp16()
 
if diff_lr_decay_factor != 0:
    k = len(layerwise_splitter(model))
    lr = slice(lr*diff_lr_decay_factor**k,lr)

metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
cbs = [WandbCallback(log_preds=False, log_model=False), SaveModelCallback(monitor=metric_to_monitor)]
learn.fit_one_cycle(4, lr, cbs=cbs)
Could not gather input dimensions
epoch train_loss valid_loss accuracy time
0 0.234400 0.234248 0.915138 03:09
1 0.173527 0.247280 0.917431 03:11
2 0.097726 0.246916 0.924312 03:10
3 0.078707 0.263630 0.925459 03:11
Better model found at epoch 0 with accuracy value: 0.9151375889778137.
Better model found at epoch 1 with accuracy value: 0.9174311757087708.
Better model found at epoch 2 with accuracy value: 0.9243119359016418.
Better model found at epoch 3 with accuracy value: 0.9254587292671204.
learn.show_results()
text category category_
0 the movie has an infectious exuberance that will engage anyone with a passing interest in the skate/surf culture, the l.a. beach scene and the imaginative ( and sometimes illegal ) ways kids can make a playground out of the refuse of adults. 1 1
1 what really makes it special is that it pulls us into its world, gives us a hero whose suffering and triumphs we can share, surrounds him with interesting characters and sends us out of the theater feeling we've shared a great adventure. 1 1
2 this is a train wreck of an action film -- a stupefying attempt by the filmmakers to force-feed james bond into the mindless xxx mold and throw 40 years of cinematic history down the toilet in favor of bright flashes and loud bangs. 0 0
3 it's one of those baseball pictures where the hero is stoic, the wife is patient, the kids are as cute as all get-out and the odds against success are long enough to intimidate, but short enough to make a dream seem possible. 1 1
4 though perry and hurley make inspiring efforts to breathe life into the disjointed, haphazard script by jay scherick and david ronn, neither the actors nor director reginald hudlin can make it more than fitfully entertaining. 0 1
5 may be far from the best of the series, but it's assured, wonderfully respectful of its past and thrilling enough to make it abundantly clear that this movie phenomenon has once again reinvented itself for a new generation. 1 1
6 despite all evidence to the contrary, this clunker has somehow managed to pose as an actual feature movie, the kind that charges full admission and gets hyped on tv and purports to amuse small children and ostensible adults. 0 0
7 it's inoffensive, cheerful, built to inspire the young people, set to an unending soundtrack of beach party pop numbers and aside from its remarkable camerawork and awesome scenery, it's about as exciting as a sunburn. 0 1
8 but the power of these ( subjects ) is obscured by the majority of the film that shows a stationary camera on a subject that could be mistaken for giving a public oration, rather than contributing to a film's narrative. 0 0
# preds = learn.get_preds(dl=test_dl)
del learn
gc.collect()
torch.cuda.empty_cache()

Sweeps

wandb.login()
Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
wandb: Currently logged in as: fastai_community (use `wandb login --relogin` to force relogin)
True
def train():
    with wandb.init() as run:
        cfg = run.config
        model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=glue_num_labels.get(task, 2))
        metrics = glue_metrics[task]
        k = len(layerwise_splitter(model))
        if cfg.diff_lr_decay_factor: lr = slice(cfg.lr*cfg.diff_lr_decay_factor**k,cfg.lr)
        learn = TransLearner(dls, model, metrics=metrics, opt_func=Adam, splitter=layerwise_splitter).to_fp16()
        learn.fit_one_cycle(n_epoch, cfg.lr, wd=cfg.wd, cbs=[WandbCallback(log_preds=False, log_model=False)])
        del learn
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
metrics = glue_metrics[task]
metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
sweep_name = f"glue-{task}-sweep"
sweep_config = {
    "project":"glue-benchmark",
    "entity": "fastai_cimmunity",
    "name": sweep_name,
    "method": "random",
    "parameters": {
        "lr": {"values":[1e-5,2e-5,3e-5,5e-5, 1e-4, 3e-4]},
        "wd": {"values":[0.,1e-2,5e-2]},
        "diff_lr_decay_factor":{"values":[0., 0.9, 0.8, 0.7, 0.6]}
    },
    "metric":{"goal": "maximise", "name": metric_to_monitor},
    "early_terminate": {"type": "hyperband", "s": 2, "eta": 3, "max_iter": 40}
}
sweep_id = wandb.sweep(sweep_config, project='glue-benchmark', entity="fastai_community")
Create sweep with ID: dwio5fl2
Sweep URL: https://wandb.ai/fastai_community/uncategorized/sweeps/dwio5fl2
wandb.agent(sweep_id, function=train)
wandb.finish()

Another task example: MultiNLI

task = 'mnli'
validate_task()
ds = load_dataset(ds_name, task)
Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)
train_idx, valid_idx = get_splits(ds, valid='validation_matched')
train_ds = concatenate_datasets([ds['train'], ds['validation_matched']])
train_ds[0]
{'hypothesis': 'Product and geography are what make cream skimming work. ',
 'idx': 0,
 'label': 1,
 'premise': 'Conceptually cream skimming has two basic dimensions - product and geography.'}
lens = train_ds.map(lambda s: {'len': len(s['premise'])+len(s['hypothesis'])}, remove_columns=train_ds.column_names, num_proc=4, keep_in_memory=True)
train_lens = lens.select(train_idx)['len']
valid_lens = lens.select(valid_idx)['len']



dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name),
                             CategoryBlock()],
                   get_x=TextGetter('premise', 'hypothesis'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))
%%time
dl_kwargs=[{'res':train_lens}, {'val_res':valid_lens}]
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs, dl_kwargs=dl_kwargs, num_workers=4)
CPU times: user 1min 25s, sys: 1.62 s, total: 1min 27s
Wall time: 1min 27s
dls.show_batch(max_n=4)
text text_ category
0 well uh that's kind of obvious i mean they're even carrying it to to where now uh that they advertise on TV you know if your if you uh you know have done this or if you need this uh uh we'll sue for you and you don't have to pay us unless you but then what they don't tell you is that if you if they win you give them at least a third of the of the thing that they win so i don't know it is uh it's getting to be more business now rather than uh actually uh dealing with the crime than with uh um the uh punishment they the the lawyers are just in it for the money i'm i'm convinced i know i i agree with you i think you're real you're very right that the politicians should i think they I think that there should be an equal representation of backgrounds in our politicians. 0
1 i think it's come along very slowly i think that uh a lot of the stresses that you talk about have have have taken a long time to become recognized but more strides are made toward addressing those stresses in women than ever were in men and i think that you know women have had stress you know for the last two hundred years but it's been a different kind of stress different kinds of situations because just being in the work place is different from being at home but at home you still have work to do you still have things to take care of but you don't have the responsibilities and the pressures that you have in the work place so a lot of women have a little bit of trouble adjusting to that but i feel like the in in the last couple of decades the the There will still be many women who strive to have a full time career but some seem to be choosing to stay home with their kids because daycare has become so expensive. 1
2 and uh essentially the cafeteria plans are programs where the company makes available a a different uh uh different packages of benefits you you know you can pick the kind of insurance you want and the kind of dental coverage and the kind of vacation policy you want an and all of these different things and you pick the ones that you want as long as the total cost to the company essentially the total cost to the company fits you know within their guidelines so every employee gets the same dollar amount of benefits but they can pick them from different whatever is most important to them The company has something called 'cafeteria plans,' where you are given a selection of different benefit packages including insurance, dental, and vacation plans and you can choose any combination between them as long as it comes out to the same total cost to the company for every employee. 0
3 it doesn't take much but uh we got him all back going and packed up and came on back uh let's see i i had another another camp out that wasn't uh near as bad in fact it had some really good good points to it i'm trying to think of exactly where we were uh we were on another lake i don't remember the the name of it but we were right up at the shore and the way it was layed out the camp grounds were uh it was another state park and the campgrounds had railroad ties blocking off a sandy pit and that made for a fairly level place to put the tent but to get down to the beach you had to walk down a real steep embankment i mean it was so steep they had put railroad ties as stair steps every once in The one camp site we went to was not too bad, I remember the beach being close to the camp site. 0
lr=5e-5
wd=0.01
WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with Adam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'adam']
wandb.init(reinit=True, project="glue-benchmark", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS);
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()
metric_to_monitor = metrics[0].name if isinstance(metrics[0], Metric) else metrics[0].__name__
cbs = [WandbCallback(log_preds=False, log_model=False)]
# cbs += [SaveModelCallback(monitor=metric_to_monitor)]
learn.fit_one_cycle(4, lr, wd=wd, cbs=cbs)
Could not gather input dimensions
epoch train_loss valid_loss accuracy time
0 0.522656 0.507707 0.800102 28:19
1 0.453350 0.465771 0.819766 28:21
2 0.346266 0.462031 0.837290 28:20
3 0.230743 0.500686 0.843097 28:24
learn.show_results()
text text_ category category_
0 yes they would they just wouldn't be able to own the kind of automobiles that they think they deserve to own or the kind of homes that we think we deserve to own we might have to you know just be able to i think if we a generation went without debt then the next generation like if if our our generation my husband and i we're twenty eight if we lived our lives and didn't become you know indebted like you know our generation before us that um the budget would balance and that we became accustomed to living with what we could afford which we wouldn't be destitute i mean we wouldn't be living on the street by any means but just compared to how spoiled we are we would be in our own minds but i feel like the generation after us would oh man it it Society would be perfect and there would be no more war if we could just rid ourselves of our debt. 1 2
1 yes they would they just wouldn't be able to own the kind of automobiles that they think they deserve to own or the kind of homes that we think we deserve to own we might have to you know just be able to i think if we a generation went without debt then the next generation like if if our our generation my husband and i we're twenty eight if we lived our lives and didn't become you know indebted like you know our generation before us that um the budget would balance and that we became accustomed to living with what we could afford which we wouldn't be destitute i mean we wouldn't be living on the street by any means but just compared to how spoiled we are we would be in our own minds but i feel like the generation after us would oh man it it I am glad our generation has no debt. 2 2
2 The rule prohibits the sale of nicotine-containing cigarettes and smokeless tobacco to individuals under the age of 18; requires manufacturers, distributors, and retailers to comply with various conditions regarding the sale and distribution of these products; requires retailers to verify a purchaser's age by photographic identification; prohibits all free samples; limits the distribution of these products through vending machines and self-service displays by permitting such methods of sale only in facilities where access by individuals under 18 is prohibited; limits the advertising and labeling to which children and adolescents are exposed; prohibits promotional, non-tobacco items such as hats and tee shirts; prohibits sponsorship of This rule will make the sale of tobacco products to people under 18 years old legal in every state and Mexico. 2 2
3 The rule prohibits the sale of nicotine-containing cigarettes and smokeless tobacco to individuals under the age of 18; requires manufacturers, distributors, and retailers to comply with various conditions regarding the sale and distribution of these products; requires retailers to verify a purchaser's age by photographic identification; prohibits all free samples; limits the distribution of these products through vending machines and self-service displays by permitting such methods of sale only in facilities where access by individuals under 18 is prohibited; limits the advertising and labeling to which children and adolescents are exposed; prohibits promotional, non-tobacco items such as hats and tee shirts; prohibits sponsorship of The rule will not allow for cigarettes that contain nicotine to be sold to anyone under 18 years of age. 0 0
4 yeah because you look at the statistics now and i'm sure it's in your your newspapers just like it is in ours that every major city now the increase of crime is is escalating i mean there are more look at the look at the people there are being shot now i mean every day there's there's dozens of dozens of people across the nation they just get blown away for no reason you know stray bullets or California they were going out there and they were shooting and they get these guys and they don't do anything with them so i kind of i kind of agree with you i'm kind of you still in the in the uh prison system "Crime is escalating now in every major city." 0 0
5 i know because i think i've been reading i read this ten years ago that they were having these big uh um rallies and people would be in the streets flashing signs statehood yes and other people would statehood down the statehood it's it down there if you're um familiar with their politics they uh it's very uh i i don't know it's called Latino there they have loudspeakers on their cars and they run down the neighborhood saying vote for you know Pierre he's or uh Pedro uh Pedro he's the best it's it's really kind of comical Ten years ago, they rallies on streets with flashing signs and loudspeakers advertising voting candidates. 0 0
6 Of how, when tea was done, and everyone had stood,He reached for my head, put his hands over it,And gently pulled me to his chest, which smelledOf dung smoke and cinnamon and mutton grease.I could hear his wheezy breathing now, like the prophet's Last whispered word repeated by the faithful.Then he prayed for what no one had time to translate--His son interrupted the old man to tell him a groupOf snake charmers sought his blessing, and a blind thief.The saint pushed me away, took one long look,Then straightened my collar and nodded me toward the door. When tea was done, he put his hands on me. 0 2
7 right right well you know i think uh i think it's going to happen i don't know i don't know what else i could suggest to them you know if they ask me what should we do i don't know i wouldn't know what else to suggest to them just education start with these little kids you know and like you said you know start making it practice you know start showing all the street signs and all the cars of course i think all the cars are manufactured that way they aren't aren't all of them most o f the new ones i'm seeing are are made with miles per hour and kilometers on them Education will make it happen. 1 1
8 In addition to the arguments previously advanced by the Vice Presidentas representatives and addressed in our June 22 letter to the Counsel to the Vice President (see Enclosure 1), the Vice Presidentas August 2 letter to the Congress asserts that the study is not authorized by statute because GAO is limited to looking at the aresults- of programs and that GAO does not have a right of access to documents because the Vice President is not included under the term aagency- used in GAOas statute. The Vice President's representatives went further and demanded an apology from the GAO. 1 1
valid_mm_dl = dls.test_dl(ds['validation_mismatched'], with_labels=True)
learn.validate(dl=valid_mm_dl)
(#2) [0.4921019375324249,0.842351496219635]