from transformers import AutoModelForSequenceClassification
from fastai.text.all import *
from fastai.callback.wandb import *

from fasthugs.learner import TransLearner
from fasthugs.data import TransformersTextBlock, TextGetter, get_splits, PreprocCategoryBlock

from datasets import load_dataset, concatenate_datasets

Setup

Let's define main settings for the run in one place:

ds_name = 'glue'
model_name = "distilroberta-base"

max_len = 512
bs = 32
val_bs = bs*2

lr = 3e-5

GLUE_TASKS = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
def validate_task():
    assert task in GLUE_TASKS

from fastai.metrics import MatthewsCorrCoef, F1Score, PearsonCorrCoef, SpearmanCorrCoef

glue_metrics = {
    'cola':[MatthewsCorrCoef()],
    'sst2':[accuracy],
    'mrpc':[F1Score(), accuracy],
    'stsb':[PearsonCorrCoef(), SpearmanCorrCoef()],
    'qqp' :[F1Score(), accuracy],
    'mnli':[accuracy],
    'qnli':[accuracy],
    'rte' :[accuracy],
    'wnli':[accuracy],
}

CoLA

task = 'cola'
validate_task()

ds = load_dataset(ds_name, task)

ds.keys()

dict_keys(['train', 'validation', 'test'])

len(ds['train']), len(ds['validation'])

(8551, 1043)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#1043) [8551,8552,8553,8554,8555,8556,8557,8558,8559,8560...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 1,
 'sentence': "Our friends won't buy this analysis, let alone the next one we propose."}

vocab = train_ds.features['label'].names
dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=ItemGetter('sentence'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 6.27 s, sys: 1.95 s, total: 8.22 s
Wall time: 8.24 s

dls.show_batch(max_n=4)

import wandb

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
CONFIG = {}
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = [WandbCallback(log_preds=False, log_model=False), SaveModelCallback(monitor=metrics[0].name)]
learn.fit_one_cycle(4, lr, cbs=cbs)

Could not gather input dimensions

Better model found at epoch 0 with matthews_corrcoef value: 0.40154309805886357.
Better model found at epoch 1 with matthews_corrcoef value: 0.570213836374093.
Better model found at epoch 3 with matthews_corrcoef value: 0.5815775806078913.

learn.show_results()

SST

task = 'sst2'
validate_task()

ds = load_dataset(ds_name, task)

Downloading and preparing dataset glue/sst2 (download: 7.09 MiB, generated: 4.81 MiB, post-processed: Unknown size, total: 11.90 MiB) to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...
Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.

ds.keys()

dict_keys(['train', 'validation', 'test'])

len(ds['train']), len(ds['validation'])

(67349, 872)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#872) [67349,67350,67351,67352,67353,67354,67355,67356,67357,67358...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 0,
 'sentence': 'hide new secretions from the parental units '}

vocab = train_ds.features['label'].names
dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=ItemGetter('sentence'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 32.3 s, sys: 268 ms, total: 32.6 s
Wall time: 32.4 s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

Training

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = [WandbCallback(log_preds=False, log_model=False), SaveModelCallback(monitor=metrics[0].__name__)]
learn.fit_one_cycle(4, lr, cbs=cbs)

Could not gather input dimensions

Better model found at epoch 0 with accuracy value: 0.9128440618515015.
Better model found at epoch 1 with accuracy value: 0.9185779690742493.
Better model found at epoch 2 with accuracy value: 0.9220183491706848.
Better model found at epoch 3 with accuracy value: 0.9243119359016418.

learn.show_results()

Microsoft Research Paraphrase Corpus

task = 'mrpc'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(3668, 408)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#408) [3668,3669,3670,3671,3672,3673,3674,3675,3676,3677...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 1,
 'sentence1': 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
 'sentence2': 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .'}

vocab = train_ds.features['label'].names
dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=TextGetter('sentence1', 'sentence2'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 2.19 s, sys: 35.2 ms, total: 2.23 s
Wall time: 2.2 s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

Training

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = [WandbCallback(log_preds=False, log_model=False), SaveModelCallback(monitor=metrics[0].name)]
learn.fit_one_cycle(4, lr, cbs=cbs)

Could not gather input dimensions

Better model found at epoch 0 with f1_score value: 0.8.
Better model found at epoch 1 with f1_score value: 0.8481262327416172.
Better model found at epoch 2 with f1_score value: 0.8959435626102292.

learn.show_results()

Semantic Textual Similarity Benchmark

task = 'stsb'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(5749, 1500)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#1500) [5749,5750,5751,5752,5753,5754,5755,5756,5757,5758...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 5.0,
 'sentence1': 'A plane is taking off.',
 'sentence2': 'An air plane is taking off.'}

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), RegressionBlock(1)],
                   get_x=TextGetter('sentence1', 'sentence2'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 4.31 s, sys: 38.1 ms, total: 4.35 s
Wall time: 4.33 s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

Quora Question Pairs

task = 'qqp'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/qqp/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(363846, 40430)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#40430) [363846,363847,363848,363849,363850,363851,363852,363853,363854,363855...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 0,
 'question1': 'How is the life of a math student? Could you describe your own experiences?',
 'question2': 'Which level of prepration is enough for the exam jlpt5?'}

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=TextGetter('question1', 'question2'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 4min 40s, sys: 607 ms, total: 4min 41s
Wall time: 4min 41s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

MultiNLI

task = 'mnli'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

ds.keys()

dict_keys(['train', 'validation_matched', 'validation_mismatched', 'test_matched', 'test_mismatched'])

train_idx, valid_idx = get_splits(ds, valid='validation_matched')
valid_idx

(#9815) [392702,392703,392704,392705,392706,392707,392708,392709,392710,392711...]

train_ds = concatenate_datasets([ds['train'], ds['validation_matched']])

train_ds[0]

{'hypothesis': 'Product and geography are what make cream skimming work. ',
 'idx': 0,
 'label': 1,
 'premise': 'Conceptually cream skimming has two basic dimensions - product and geography.'}

lens = train_ds.map(lambda s: {'len': len(s['premise'])+len(s['hypothesis'])}, remove_columns=train_ds.column_names, num_proc=4, keep_in_memory=True)
train_lens = lens.select(train_idx)['len']
valid_lens = lens.select(valid_idx)['len']

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name),
                             CategoryBlock(vocab={0:'entailment', 1:'neutral', 2:'contradiction'})],
                   get_x=TextGetter('premise', 'hypothesis'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dl_kwargs=[{'res':train_lens}, {'val_res':valid_lens}]
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs, dl_kwargs=dl_kwargs, num_workers=4)

CPU times: user 55.9 s, sys: 528 ms, total: 56.5 s
Wall time: 56.4 s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

valid_mm_dl = dls.test_dl(ds['validation_mismatched'], with_labels=True)
learn.validate(dl=valid_mm_dl)

(#2) [0.4746566414833069,0.8428600430488586]

Question NLI

task = 'qnli'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/qnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(104743, 5463)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#5463) [104743,104744,104745,104746,104747,104748,104749,104750,104751,104752...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 1,
 'question': 'When did the third Digimon series begin?',
 'sentence': 'Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese.'}

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=TextGetter('question', 'sentence'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 1min 17s, sys: 324 ms, total: 1min 17s
Wall time: 1min 17s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

Recognizing Textual Entailment

task = 'rte'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(2490, 277)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#277) [2490,2491,2492,2493,2494,2495,2496,2497,2498,2499...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 1,
 'sentence1': 'No Weapons of Mass Destruction Found in Iraq Yet.',
 'sentence2': 'Weapons of Mass Destruction Found in Iraq.'}

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=TextGetter('sentence1', 'sentence2'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 1.93 s, sys: 19.9 ms, total: 1.95 s
Wall time: 1.94 s

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

Winograd NLI

task = 'wnli'
validate_task()

ds = load_dataset(ds_name, task)

Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)

len(ds['train']), len(ds['validation'])

(635, 71)

train_idx, valid_idx = get_splits(ds)
valid_idx

(#71) [635,636,637,638,639,640,641,642,643,644...]

train_ds = concatenate_datasets([ds['train'], ds['validation']])

train_ds[0]

{'idx': 0,
 'label': 1,
 'sentence1': 'I stuck a pin through a carrot. When I pulled the pin out, it had a hole.',
 'sentence2': 'The carrot had a hole.'}

dblock = DataBlock(blocks = [TransformersTextBlock(pretrained_model_name=model_name), PreprocCategoryBlock(vocab)],
                   get_x=TextGetter('sentence1', 'sentence2'),
                   get_y=ItemGetter('label'),
                   splitter=IndexSplitter(valid_idx))

%%time
dls = dblock.dataloaders(train_ds, bs=bs, val_bs=val_bs)

CPU times: user 492 ms, sys: 43 µs, total: 492 ms
Wall time: 488 ms

dls.show_batch(max_n=4)

WANDB_NAME = f'{ds_name}-{task}-{model_name}'
GROUP = f'{ds_name}-{task}-{model_name}-{lr:.0e}'
NOTES = f'finetuning {model_name} with RAdam lr={lr:.0e}'
TAGS =[model_name, ds_name, 'radam']

wandb.init(reinit=True, project="fasthugs", entity="fastai_community",
           name=WANDB_NAME, group=GROUP, notes=NOTES, tags=TAGS, config=CONFIG);

model = AutoModelForSequenceClassification.from_pretrained(model_name)
metrics = glue_metrics[task]
learn = TransLearner(dls, model, metrics=metrics).to_fp16()

cbs = []
learn.fit_one_cycle(4, lr, cbs=cbs)

learn.show_results()

	text	category
0	Everybody who has ever, worked in any office which contained any typewriter which had ever been used to type any letters which had to be signed by any administrator who ever worked in any department like mine will know what I mean.	acceptable
1	In January 2002, a dull star in an obscure constellation suddenly became 600,000 times more luminous than our Sun, temporarily making it the brightest star in our galaxy.	acceptable
2	Playing with matches is; lots of fun, but doing, so and emptying gasoline from one can to another at the same time is a sport best reserved for arsons.	acceptable
3	All native speakers have a grammatical competence which can generate an infinite set of grammatical sentences from a finite set of resources.	acceptable

epoch	train_loss	valid_loss	matthews_corrcoef	time
0	0.524564	0.520301	0.401543	00:29
1	0.393098	0.426386	0.570214	00:29
2	0.264803	0.507379	0.541898	00:30
3	0.166178	0.571510	0.581578	00:30

	text	category	category_
0	Scientists at the South Hanoi Institute of Technology have succeeded in raising one dog with five legs, another with a cow's liver, and a third with no head.	acceptable	acceptable
1	The newspaper has reported that they are about to appoint someone, but I can't remember who the newspaper has reported that they are about to appoint.	acceptable	acceptable
2	Sandy is very anxious to see if the students will be able to solve the homework problem in a particular way, but she won't tell us in which way.	acceptable	acceptable
3	Sandy is very anxious to see if the students will be able to solve the homework problem in a particular way, but she won't tell us which.	acceptable	acceptable
4	As a teacher, you have to deal simultaneously with the administration's pressure on you to succeed, and the children's to be a nice guy.	unacceptable	unacceptable
5	Put a picture of Bill on your desk before tomorrow, this girl in the red coat will put a picture of Bill on your desk before tomorrow.	unacceptable	acceptable
6	Clinton is anxious to find out which budget dilemmas Panetta would be willing to tackle in a certain way, but he won't say in which.	acceptable	acceptable
7	I live at the place where Route 150 crosses the River and my dad lives at the place where Route 150 crosses the Hudson River too.	acceptable	acceptable
8	Harry told Sue that Albania is a lovely place for a vacation, and Tom told Sally that Albania is a lovely place for a vacation.	acceptable	acceptable

	text	category
0	... spiced with humor ('i speak fluent flatula,'advises denlopp after a rather, er, bubbly exchange with an alien deckhand ) and witty updatings ( silver's parrot has been replaced with morph, a cute alien creature who mimics everyone and everything around )	positive
1	chronicle not only of one man's quest to be president, but of how that man single-handedly turned a plane full of hard-bitten, cynical journalists into what was essentially, by campaign's end, an extended publicity department.	positive
2	the fascination comes in the power of the huston performance, which seems so larger than life and yet so fragile, and in the way the ivan character accepts the news of his illness so quickly but still finds himself unable to react	positive
3	the bourne identity shouldn't be half as entertaining as it is, but director doug liman and his colleagues have managed to pack it with enough action to satisfy the boom-bam crowd without a huge sacrifice of character and mood.	positive

epoch	train_loss	valid_loss	accuracy	time
0	0.233458	0.233393	0.912844	04:10
1	0.154877	0.256017	0.918578	04:10
2	0.115099	0.250559	0.922018	04:10
3	0.064575	0.286238	0.924312	04:11

	text	category	category_
0	the movie has an infectious exuberance that will engage anyone with a passing interest in the skate/surf culture, the l.a. beach scene and the imaginative ( and sometimes illegal ) ways kids can make a playground out of the refuse of adults.	positive	positive
1	what really makes it special is that it pulls us into its world, gives us a hero whose suffering and triumphs we can share, surrounds him with interesting characters and sends us out of the theater feeling we've shared a great adventure.	positive	positive
2	this is a train wreck of an action film -- a stupefying attempt by the filmmakers to force-feed james bond into the mindless xxx mold and throw 40 years of cinematic history down the toilet in favor of bright flashes and loud bangs.	negative	negative
3	it's one of those baseball pictures where the hero is stoic, the wife is patient, the kids are as cute as all get-out and the odds against success are long enough to intimidate, but short enough to make a dream seem possible.	positive	positive
4	though perry and hurley make inspiring efforts to breathe life into the disjointed, haphazard script by jay scherick and david ronn, neither the actors nor director reginald hudlin can make it more than fitfully entertaining.	negative	positive
5	despite all evidence to the contrary, this clunker has somehow managed to pose as an actual feature movie, the kind that charges full admission and gets hyped on tv and purports to amuse small children and ostensible adults.	negative	negative
6	may be far from the best of the series, but it's assured, wonderfully respectful of its past and thrilling enough to make it abundantly clear that this movie phenomenon has once again reinvented itself for a new generation.	positive	positive
7	it's inoffensive, cheerful, built to inspire the young people, set to an unending soundtrack of beach party pop numbers and aside from its remarkable camerawork and awesome scenery, it's about as exciting as a sunburn.	negative	positive
8	but the power of these ( subjects ) is obscured by the majority of the film that shows a stationary camera on a subject that could be mistaken for giving a public oration, rather than contributing to a film's narrative.	negative	negative

	text	text_	category
0	Amrozi accused his brother, whom he called " the witness ", of deliberately distorting his evidence.	Referring to him as only " the witness ", Amrozi accused his brother of deliberately distorting his evidence.	equivalent
1	He said the situation undermines efforts to win international co-operation in the war on terror.	He said the way the situation was being handled was undermining efforts to win international cooperation in the war on terror.	equivalent
2	A day earlier, a committee appointed by reformist President Mohammad Khatami called for an independent judicial inquiry into Kazemi's death.	A day earlier, a committee appointed by President Mohammad Khatami had called for an independent inquiry into the 54-year-old photojournalist's death.	equivalent
3	The U.S. Centers for Disease Control and Prevention has been gathering information on suspect cases.	Nationally, the federal Centers for Disease Control and Prevention recorded 4,156 cases of West Nile, including 284 deaths.	not_equivalent

epoch	train_loss	valid_loss	f1_score	accuracy	time
0	0.625647	0.574429	0.800000	0.688725	00:34
1	0.457276	0.377830	0.848126	0.811275	00:34
2	0.282977	0.327592	0.895944	0.855392	00:35
3	0.190531	0.392248	0.886598	0.838235	00:35

	text	text_	category	category_
0	He said the foodservice pie business doesn 't fit the company's long-term growth strategy.	" The foodservice pie business does not fit our long-term growth strategy.	equivalent	equivalent
1	Sales for the quarter beat expectations, rising 37 percent year-on-year to 1.76 billion euros.	Sales rose 37 per cent year-on-year to 1.76bn, beating expectations.	equivalent	equivalent
2	Q : Can I buy coverage for prescription drugs right away?	Congress has added a new benefit - an option to buy insurance coverage for prescription drugs.	not_equivalent	not_equivalent
3	Saddam loyalists have been blamed for sabotaging the nation's infrastructure, as well as frequent attacks on U.S. soldiers.	Hussein loyalists have been blamed for sabotaging the nation's infrastructure and attacking US soldiers.	equivalent	equivalent
4	The delegates said raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations, bank accounts of terror-related organizations and money transfers.	Bin Laden ’ s men pointed out that raising and distributing funds has been complicated by the U.S. crackdown on jihadi charitable foundations, bank accounts of terror-related organizations and money transfers.	equivalent	equivalent
5	" The government elements who have been causing trouble are still in place.	The government elements who have been causing trouble are still in place, they are attacking us. "	not_equivalent	not_equivalent
6	" More than 70,000 men and women from bases in Southern California were deployed in Iraq.	In all, more than 70,000 troops based in Southern California were deployed to Iraq.	equivalent	equivalent
7	Available July 7, the software supports the Solaris, IBM AIX, Red Hat Linux and Windows operating systems.	The OpForce product currently works with Solaris, AIX, Red Hat Linux and Windows servers.	not_equivalent	not_equivalent
8	By skirting the FDA's oversight, Eagan said, the quality of the imported drugs is " less predictable " than for those obtained in the United States.	By skirting the FDA's oversight, Eagan said the quality of the imported drugs is " less predictable " than U.S. drugs.	equivalent	equivalent

	text	text_	text__
0	A plane is taking off.	An air plane is taking off.	5.0
1	Top judge Mansour sworn in as Egypt interim president	Justice Adly Mansour sworn in as Egypt interim president	4.400000095367432
2	Matt Smith to leave Doctor Who after 4 years	Matt Smith quits BBC’s Doctor Who	4.0
3	A man is paddling a canoe.	A man is playing a harp.	0.6669999957084656

epoch	train_loss	valid_loss	pearsonr	spearmanr	time
0	0.921977	0.757547	0.840027	0.842902	00:40
1	0.621509	0.574162	0.865038	0.863698	00:39
2	0.436710	0.580115	0.869754	0.870622	00:39
3	0.311906	0.571886	0.874994	0.874831	00:40

	text	text_	text__
0	A man with a hard hat is dancing.A man wearing a hard hat is dancing.	5.0	(4.900470733642578,)
1	A monkey is karate kicking a person.A monkey pratices martial arts.	2.5	(3.0790014266967773,)
2	The udders of a dairy cow that is standing in a pasture near a large building.The black and brown cow is standing on the green grass.	2.0	(1.802596092224121,)
3	No, you don't need to have taken classes or earned a degree in your area.I have done quite a bit of technical writing for healthcare information systems.	0.0	(0.5809511542320251,)
4	Suicide bomber kills 4 near NATO's Afghan HQSuicide bomber kills 23 at Afghan wedding	1.600000023841858	(1.5624017715454102,)
5	Crohn's disease causes inflammation of the intestine and symptoms include diarrhea, pain, weight loss and tiredness.Symptoms include chronic diarrhoea, abdominal pain, weight loss and extreme tiredness.	3.0	(3.196563720703125,)
6	According to the 2000 Census, Long Beach's Hispanic or Latino population was listed at 35.8 percent.According to the Census Bureau, the Hispanic population increased by 9.8 percent from the April 2000 census figures.	2.4000000953674316	(1.9710969924926758,)
7	Two dogs in the snowTwo dogs play in the grass.	2.4000000953674316	(1.7369558811187744,)
8	A basketball player makes the 5th rebound of the night,A tennis player in the middle of a game.	1.399999976158142	(0.4190220236778259,)

	text	text_	category
0	How is the life of a math student? Could you describe your own experiences?	Which level of prepration is enough for the exam jlpt5?	0
1	What is the best way to approach random women stranger in public as on streets?	What is the best way to approach women I see in public and ask them on a date?	1
2	Are there breakfast cereals in the Middle East and Israel?	What would happen to the Middle East if Israel and Iran became allies?	0
3	What is the maximum size possible for a rocky planet?	Is there a limit to the size of rocky planets?	1

epoch	train_loss	valid_loss	f1_score	accuracy	time
0	0.291785	0.277536	0.836042	0.879743	39:15
1	0.240468	0.244921	0.859694	0.895820	39:19
2	0.188160	0.233590	0.871434	0.906035	39:18
3	0.132292	0.257508	0.877380	0.909053	39:08

	text	category	category_
0	Why are African-Americans so beautiful?Why are hispanics so beautiful?	0	0
1	What is the effect of a curved penis on sex?Sex: How do I put my penis in a vagina?	0	0
2	Why is Manaphy bipolar in Pokémon ranger and the Temple of the sea?In Pokemon Ranger and The Temple of The Sea, Why is Manaphy angsty?	1	1
3	What are some ways South Korea can develop its economy?How did South Korea become a developed nation?	1	1
4	How can I advertise to a list of Twitter usernames?How do I export Twitter lists to a text file?	0	0
5	What is sliding round in ipu and its process?What is the eligibility of sliding round in ipu?	0	1
6	How do I stop being desperate for a relationship?How can I stop being so desperate to have a girlfriend in India?	1	0
7	How can I write persuasive love letter that will make any girl fall in love after reading it?After rejection from a girl can I make her fall in love with me?	0	0
8	Which are the best TV series that you have ever watched and why?What are the best TV series you ever watched and why?	1	1

	text	text_	category
0	well uh that's kind of obvious i mean they're even carrying it to to where now uh that they advertise on TV you know if your if you uh you know have done this or if you need this uh uh we'll sue for you and you don't have to pay us unless you but then what they don't tell you is that if you if they win you give them at least a third of the of the thing that they win so i don't know it is uh it's getting to be more business now rather than uh actually uh dealing with the crime than with uh um the uh punishment they the the lawyers are just in it for the money i'm i'm convinced i know i i agree with you i think you're real you're very right that the politicians should i think they	I think that there should be an equal representation of backgrounds in our politicians.	0
1	and yeah oh yeah this is this is um well yeah up here in in in Rochester winter usually doesn't you know spring comes about this time March or April and um this was sort of unexpected it had actually been pretty fairly nice you know in the in the forties and um it had even been in the uh in the in the low fifties a couple of days in a row and then they actually they predicted it they said well we're going to have an ice storm coming up and then no one knew how bad it was going to be and it came in and in one night it basically destroyed it destroyed approximately one third of all the vegetation in um in like the three county area by us and about half the city owned trees in the city of Rochester are destroyed um we	We haven't had any big storms at all.	2
2	i think on the local elections it's actually um i think um i think people vote in the larger elections because they feel like well you know the country's real important and you know if i elect if we help elect a president who's going to save us then the then the whole country will be saved but they figure local elections don't mean that much i suspect so people don't really worry about their local mayor or something figuring that you know um and i think wrongly figuring that that well what good is another mayor going to do anyway because our federal taxes are more important right now and you know and stuff like that so people think that that that they don't have um that that it doesn't doesn't matter as much whereas i think it's probably just the opposite is true i think it's probably the	I think people should only vote for the presidential elections.	2
3	Add the shrill vocals of former Sex Pistol John Lydon, and the result is Leftfield's hectoring. Drench a track in shadowy atmosphere, and the result is the ominous soundscape of Tricky's. Combine '50s space music, French pop balladeering, and Marxist catch phrases, and the result is such Stereolab confections as. Transpose the cyclical rhythms of dance music (and work by minimalist composers like Steve Reich) to guitars, and the result is the hypnotic disorientation of My Bloody Valentine's. Include reggae rhythms and quotations from Indian and Arabic music, and the result is the ethno-techno of Loop Guru tracks like.	Mixing music genres together seldom produces anything of value.	2

epoch	train_loss	valid_loss	accuracy	time
0	0.532167	0.516440	0.797860	29:07
1	0.431111	0.452060	0.829852	29:02
2	0.345421	0.453751	0.841671	29:00
3	0.252346	0.486783	0.841773	29:02

epoch	train_loss	valid_loss	accuracy	time
0	0.325645	0.272470	0.892367	12:54
1	0.275354	0.239894	0.901153	12:59
2	0.182944	0.243231	0.910672	12:59
3	0.121718	0.269116	0.909573	13:00

	text	category	category_
0	What came into force after the new constitution was herald?As of that day, the new constitution heralding the Second Republic came into force.	0	0
1	How many campuses does the California State University have?Amongst these include 5 University of California campuses (Irvine, Los Angeles, Riverside, Santa Barbara, and San Diego); 12 California State University campuses (Bakersfield, Channel Islands, Dominguez Hills, Fullerton, Los Angeles, Long Beach, Northridge, Pomona, San Bernardino, San Diego, San Marcos, and San Luis Obispo); and private institutions such as the California Institute of Technology, Chapman University, the Claremont Colleges (Claremont McKenna College, Harvey Mudd College, Pitzer College, Pomona College, and Scripps College), Loma Linda University, Loyola Marymount University, Occidental College, Pepperdine University, University of Redlands, University of San Diego, and the University of Southern California.	0	0
2	What service did ABC launch in May 2013?New York City O&O WABC-TV and Philadelphia O&O WPVI-TV were the first stations to offer streams of their programming on the service (with a free preview for non-subscribers through June 2013), with the six remaining ABC O&Os offering streams by the start of the 2013–14 season.	1	1
3	What did the Public Health Cigarette Smoking Act ban?In April 1970, Congress passed the Public Health Cigarette Smoking Act which banned cigarette advertising from all television and radio networks, including ABC, when it took effect on January 2, 1971.	0	0
4	What city did Tesla move to in 1880?Unfortunately, he arrived too late to enroll at Charles-Ferdinand University; he never studied Greek, a required subject; and he was illiterate in Czech, another required subject.	1	1
5	What was the ideal duty of a Newcomen engine?The historical measure of a steam engine's energy efficiency was its "duty".	1	1
6	What does UMC stand for?The United Methodist Church (UMC) is a mainline Protestant Methodist denomination.	0	0
7	What characteristic of oxygen causes it to form bonds with other elements?Due to its electronegativity, oxygen forms chemical bonds with almost all other elements to give corresponding oxides.	0	0
8	How many degrees south did the Amazon rainforest reach from 66-34 Mya?Climate fluctuations during the last 34 million years have allowed savanna regions to expand into the tropics.	1	1

	text	category	category_
0	yes they would they just wouldn't be able to own the kind of automobiles that they think they deserve to own or the kind of homes that we think we deserve to own we might have to you know just be able to i think if we a generation went without debt then the next generation like if if our our generation my husband and i we're twenty eight if we lived our lives and didn't become you know indebted like you know our generation before us that um the budget would balance and that we became accustomed to living with what we could afford which we wouldn't be destitute i mean we wouldn't be living on the street by any means but just compared to how spoiled we are we would be in our own minds but i feel like the generation after us would oh man it it	1	2
1	and i look back on that and i bought shoes i went shopping i did not need that money i did not need it i didn't need it i shouldn't have even qualified to get it i didn't need it and it would have been a little rough i might have eaten some bologna instead of roast beef out of the deli but i did not need it and as i look back now now we're paying that back i told my son if you have to live in the ghetto to go to college do it but don't take out ten thousand dollars in loans don't do it and i don't i hope don't think he'll have to do that but i just so like we might if we didn't have those loans we could have saved in the last five years the money for that and i believe	0	0
2	well the first thing for me is i wonder i see a couple of different ways of talking about what privacy is um if privacy is something that disturbs your private state i mean an invasion of privacy is something that disturbs your private state that's one thing and if privacy is something that comes into your private state and extracts information from it in other words finds something out about you that's another and the first kind of invasion of the first type of privacy seems invaded to me in very much everyday in this country but in the second type at least overtly uh where someone comes in and uh finds out information about you that should be private uh does not seem uh um obviously everydayAll invasions of privacy should be severely punished, because it will teach the criminals that it is not worth doing.	1	1
3	yeah because you look at the statistics now and i'm sure it's in your your newspapers just like it is in ours that every major city now the increase of crime is is escalating i mean there are more look at the look at the people there are being shot now i mean every day there's there's dozens of dozens of people across the nation they just get blown away for no reason you know stray bullets or California they were going out there and they were shooting and they get these guys and they don't do anything with them so i kind of i kind of agree with you i'm kind of you still in the in the uh prison system"Crime is escalating now in every major city."	0	0
4	i know that you know the further we go from Adam the worse the food is for you but God still somehow makes us all be able to still live i think it's a miracle we're all still alive after so many generations well the last couple of processed foods you know i mean but i don't know i like to i like to my i like to be able to eat really healthy you know what am saying and i guess i'm going to have to wait for the millennium i think though because i do don't think we're going to restore the earth to you know i think Jesus is the only one that can make this earth be restored to what it should beI like to be able to eat real healthy.	0	0
5	Of how, when tea was done, and everyone had stood,He reached for my head, put his hands over it,And gently pulled me to his chest, which smelledOf dung smoke and cinnamon and mutton grease.I could hear his wheezy breathing now, like the prophet's Last whispered word repeated by the faithful.Then he prayed for what no one had time to translate--His son interrupted the old man to tell him a groupOf snake charmers sought his blessing, and a blind thief.The saint pushed me away, took one long look,Then straightened my collar and nodded me toward the door.When tea was done, he put his hands on me romantically.	1	2
6	In addition to the arguments previously advanced by the Vice Presidentas representatives and addressed in our June 22 letter to the Counsel to the Vice President (see Enclosure 1), the Vice Presidentas August 2 letter to the Congress asserts that the study is not authorized by statute because GAO is limited to looking at the aresults- of programs and that GAO does not have a right of access to documents because the Vice President is not included under the term aagency- used in GAOas statute.The Vice President's representatives went further and demanded an apology from the GAO.	1	1
7	In addition to the arguments previously advanced by the Vice Presidentas representatives and addressed in our June 22 letter to the Counsel to the Vice President (see Enclosure 1), the Vice Presidentas August 2 letter to the Congress asserts that the study is not authorized by statute because GAO is limited to looking at the aresults- of programs and that GAO does not have a right of access to documents because the Vice President is not included under the term aagency- used in GAOas statute.Congress received no letter from the Vice President on the topic of GAO's study.	2	2
8	i know because i think i've been reading i read this ten years ago that they were having these big uh um rallies and people would be in the streets flashing signs statehood yes and other people would statehood down the statehood it's it down there if you're um familiar with their politics they uh it's very uh i i don't know it's called Latino there they have loudspeakers on their cars and they run down the neighborhood saying vote for you know Pierre he's or uh Pedro uh Pedro he's the best it's it's really kind of comicalIf I was there, I would have voted for Pedro.	1	0

	text	text_	category
0	When did the third Digimon series begin?	Unlike the two seasons before it and most of the seasons that followed, Digimon Tamers takes a darker and more realistic approach to its story featuring Digimon who do not reincarnate after their deaths and more complex character development in the original Japanese.	1
1	Steamships and railroads rose with what else?	The rise of port cities saw the clustering of populations caused by the development of steamships and railroads.	0
2	What causes Streptococci?	Nonetheless, the dramatic decrease in deaths from infectious diseases that occurred prior to World War II was primarily the result of improved public health measures such as clean water and less crowded housing, and the impact of anti-infective drugs and vaccines was significant mainly after World War II.	1
3	What kind of ceiling is in the chapter house?	A pier of eight shafts carries the vaulted ceiling.	0

	text	text_	category
0	No Weapons of Mass Destruction Found in Iraq Yet.	Weapons of Mass Destruction Found in Iraq.	1
1	ISLAMABAD, Pakistan — The uneasy truce between the Pakistani government and Taliban militants in the Swat Valley appeared increasingly fragile on Monday as government forces attacked militants in a neighboring district for a second day, causing the main negotiator for the Taliban to break off talks. Maulana Sufi Muhammad, the pro-Taliban cleric who has been negotiating peace talks between the government and the Taliban in Swat, halted talks on Monday to protest the military operation in the Lower Dir district west of Swat, his spokesman said.	Maulana Sufi Muhammad is the chief of Tehrik Nifaz Shariat-e-Muhammadi.	1
2	Australia was one of the countries in the U.S.-led coalition that invaded Iraq in 2003.	The coalition is coordinated by the U.S	0
3	Protesters, many from organized pro-government groups but including many ordinary citizens, carried anti-American banners and chanted slogans attacking U.N. Secretary-General Kofi Annan for his close alignment with U.S. policy.	Protesters confiscated anti-American banners and chanted slogans attacking U.N. Secretary-General Kofi Annan for his close alignment with U.S. policy.	1

epoch	train_loss	valid_loss	accuracy	time
0	0.695495	0.690176	0.552347	00:30
1	0.686033	0.673962	0.588448	00:30
2	0.600213	0.598385	0.689531	00:31
3	0.489225	0.628444	0.685921	00:31

	text	category	category_
0	Dana Reeve, the widow of the actor Christopher Reeve, has died of lung cancer at age 44, according to the Christopher Reeve Foundation.Christopher Reeve had an accident.	1	0
1	A plane crashed in North Carolina last year and most of the Blink-182 group was in there. The only survivors of the crash were DJ AM and musical partner Travis Barker. Less lucky, "Lil" Chris Baker, Barker's close friend and assistant, the Blink-182 drummer's bodyguard, Charles "Che" Still, the pilot and co-pilot died. Both AM and Barker suffered many injures, bad burns and they needed to stay in hospital for several weeks. Now AM asks $20 million in compensation for injuries he suffered. He lawsuits against plane maker Learjet and wants to gain $10 million for medical damages, lost earnings and profit and the same amount for mental and physical suffering.Travis Barker belongs to a band.	0	0
2	An American journalist of Russian descent, Klebnikov was shot four times by at least one assassin in a passing car as he stepped outside his office that evening in Moscow.Vladislav Listyev was murdered in Moscow.	1	1
3	Recent satellite pictures of Yellowstone National Park in Wyoming from 2004 through 2006 are showing that an ancient volcano is starting to rise once again. Molten rock is currently pushing up the remains of the volcano's caldera, which sits over the top of Yellowstone lake, but scientists are stressing that there is no immediate threat of an eruption or explosion. The molten rock field is estimated to be the size of the city of Los Angeles, California.There is a volcano in Yellowstone.	0	0
4	A cataclysmic starquake is thought to have caused a flare of radiation that ripped past the Earth on December 27, battering instruments on satellites and lighting up our atmosphere.The flash of radiation on December 27, lit up the Earth's atmosphere.	0	0
5	He also referred to the "illegal" arrest on 31 May of Mexican Professor Maria Eugenia Ochoa Garcia, whom the Salvadoran government accused of having connections with the Salvadoran guerrillas.Professor Ochoa Garcia is a member of the Salvadoran government.	1	0
6	Texas Data Recovery is also successful at retrieving lost data from notebooks and laptops, regardless of age, make or model.In the event of a disaster you could use Texas Data Recovery and you will have the capability to restore lost data.	0	1
7	Known as "heap leach" mining, the method has become popular in the last decade because it enables microscopic bits of gold to be economically extracted from low-grade ore.The mining industry uses a method known as heap leaching.	0	0
8	The extension of the effective period of marketing exclusivity for drugs is designed to give pharmaceutical companies a fair return.Prospective drugs must have long market life after regulatory approval in order to be developed.	1	1

	text	text_	category
0	I stuck a pin through a carrot. When I pulled the pin out, it had a hole.	The carrot had a hole.	1
1	In the storm, the tree fell down and crashed through the roof of my house. Now, I have to get it removed.	Now I have to get The tree removed.	1
2	Since Chester was dependent on Uncle Vernon, he couldn't very well marry without his approval	He couldn't very well marry without Chester's approval	0
3	At the Loebner competition the judges couldn't figure out which respondents were the chatbots because they were so advanced.	The judges were so advanced.	0

epoch	train_loss	valid_loss	accuracy	time
0	0.701348	0.686603	0.563380	00:04
1	0.696426	0.693483	0.436620	00:04
2	0.695261	0.692141	0.563380	00:04
3	0.695158	0.693270	0.563380	00:04

	text	category
0	The drain is clogged with hair. It has to be cleaned.The hair has to be cleaned.	0
1	Jane gave Joan candy because she was hungry.Jane was hungry.	0
2	I tried to paint a picture of an orchard, with lemons in the lemon trees, but they came out looking more like light bulbs.The lemon trees came out looking more like light bulbs.	0
3	There is a pillar between me and the stage, and I can't see around it.I can't see around the pillar.	1
4	The cat was lying by the mouse hole waiting for the mouse, but it was too cautious.The cat was too cautious.	0
5	Emma did not pass the ball to Janie although she was open.She saw that Emma was open.	0
6	Grant worked hard to harvest his beans so he and his family would have enough to eat that winter, His friend Henry let him stack them in his barn where they would dry. Later, he and Tatyana would shell them and cook them for their Sunday dinners.Later, he and Tatyana would shell them and cook them for the beans' Sunday dinners.	0
7	Sam's drawing was hung just above Tina's and it did look much better with another one below it.Tina's drawing did look much better with another one below it.	0
8	Always before, Larry had helped Dad with his work. But he could not help him now, for Dad said that his boss at the railroad company would not want anyone but him to work in the office.He could not help Larry now.	0

GLUE Benchmark

Setup

CoLA

SST

Training

Microsoft Research Paraphrase Corpus

Training

Semantic Textual Similarity Benchmark

Quora Question Pairs

MultiNLI

Question NLI

Recognizing Textual Entailment

Winograd NLI