I have the following script where I'm trying to fine tune distilbert. It seems to train decently fast, but when I run predictions on the model, then they're all over the place. I'm pretty new to python and ML, so it's been hard debugging to figure out what's happening.
import tensorflow as tf
from datasets import load_dataset
import numpy as np
from transformers import DistilBertTokenizer, TFAutoModelForSequenceClassification, pipeline, create_optimizer, DataCollatorWithPadding
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
def train():
def preprocess_function(examples):
return tokenizer(examples["text"], truncation=True)
dataset = load_dataset('json', data_files='full-items.json')
tokenized = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="tf")
batch_size = 16
num_epochs = 5
batches_per_epoch = len(tokenized["train"]) // batch_size
total_train_steps = int(batches_per_epoch * num_epochs)
optimizer, schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=total_train_steps)
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}
model = TFAutoModelForSequenceClassification.from_pretrained(
"distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)
tf_train_set = model.prepare_tf_dataset(
tokenized["train"],
shuffle=True,
batch_size=16,
collate_fn=data_collator,
)
model.compile(optimizer=optimizer, metrics="accuracy")
model.fit(x=tf_train_set, epochs=3)
model.save_pretrained('lease_to_own_model', save_format="tf")
def predict(text):
model = TFAutoModelForSequenceClassification.from_pretrained(
'lease_to_own_model'
)
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
prediction = pipe(text)
return prediction
My json file looks like this:
[
{ "text": "tv", "label": 1 },
{ "text": "gun", "label": 0 },
]
source https://stackoverflow.com/questions/74868539/hugging-face-distilbert-base-uncased-not-predicting-well
Comments
Post a Comment