Added training loop for the MTL architecture on the original distribution

This commit is contained in:
2026-02-23 16:26:48 +00:00
parent 7bd68108d0
commit 4f0c54fe28
8 changed files with 174 additions and 37 deletions

2
.gitignore vendored
View File

@@ -8,3 +8,5 @@ models/
.ipynb_checkpoints/ .ipynb_checkpoints/
*.csv *.csv
backup/*.csv backup/*.csv
runs/
outputs/

View File

@@ -51,7 +51,8 @@ class ReviewDataset(Dataset):
if __name__ == "__main__": if __name__ == "__main__":
dataset = ReviewDataset("data/processed/original_train.csv", AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")) dataset = ReviewDataset("data/processed/original_train.csv", AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base"))
print(dataset.__getitem__(1)) # print(dataset.__getitem__(1))

View File

@@ -1,69 +1,203 @@
# train.py # train.py
# some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html
from datetime import datetime
import torch import torch
import random
from sklearn.utils.class_weight import compute_class_weight from sklearn.utils.class_weight import compute_class_weight
import numpy as np import numpy as np
import torch.nn as nn import torch.nn as nn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from transformers import AutoTokenizer from transformers import AutoTokenizer
import pandas as pd import pandas as pd
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import classification_report, f1_score
from dataset import ReviewDataset from dataset import ReviewDataset
from model import Model from model import Model
SEED = 4321
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
EPOCHS = 5
PATIENCE = 3
# class weights, training loop and early stopping # class weights, training loop and early stopping
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
train = "data/processed/original_train.csv"
val = "data/processed/original_val.csv"
train_dataset = ReviewDataset(train, tokenizer)
val_dataset = ReviewDataset(val, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
model = Model().to(device)
# move input_ids, attention_mask and labels to device in each batch
# ------------------- Class weights ------------------- # ------------------- Class weights -------------------
# Using weights inversely proportional to class frequencies to avoid majority class bias, # Using weights inversely proportional to class frequencies to avoid majority class bias,
# prioritize useful bug reports / feature requests # prioritize useful bug reports / feature requests
def compute_weights(train_df, column): def compute_weights(df, column, device):
classes = np.unique(train_df[column]) classes = np.unique(df[column])
weights = compute_class_weight(class_weight='balanced', classes=classes, y=train_df[column]) weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column])
return torch.tensor(weights, dtype=torch.float).to(device) return torch.tensor(weights, dtype=torch.float).to(device)
# -------------------- Loss functions ------------------- def main():
# just a later idea device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 1.0 * bug_loss + print("Using device:", device)
# 1.0 * feature_loss + # Remove randomness
# 0.5 * aspect_loss + if torch.cuda.is_available():
# 0.5 * sentiment_loss print("GPU:", torch.cuda.get_device_name(0))
torch.cuda.manual_seed_all(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# -------------------- Optimizer and scheduler ------------------- tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
train = "data/processed/original_train.csv"
val = "data/processed/original_val.csv"
train_dataset = ReviewDataset(train, tokenizer)
val_dataset = ReviewDataset(val, tokenizer)
training_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
model = Model().to(device)
train_df = pd.read_csv(train)
# move input_ids, attention_mask and labels to device in each batch
# weights
bug_weights = compute_weights(train_df, 'bug_report', device)
feature_weights = compute_weights(train_df, 'feature_request', device)
aspect_weights = compute_weights(train_df, 'aspect', device)
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device)
# Move tensors to cpu and conver to numpy for usage with sklearn classification report
# Use detatch() later for predictions
print("Bug report class weights:", bug_weights.cpu().numpy())
print("Feature request class weights:", feature_weights.cpu().numpy())
print("Aspect class weights:", aspect_weights.cpu().numpy())
print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy())
# -------------------- Loss Functions -------------------
# for later
# 1.0 * bug_loss +
# 1.0 * feature_loss +
# 0.5 * aspect_loss +
# 0.5 * sentiment_loss
criterions = {
'bug_report': nn.CrossEntropyLoss(weight=bug_weights),
'feature_request': nn.CrossEntropyLoss(weight=feature_weights),
'aspect': nn.CrossEntropyLoss(weight=aspect_weights),
'aspect_sentiment': nn.CrossEntropyLoss(weight=aspect_sentiment_weights)
}
# -------------------- Optimizer and scheduler -------------------
optimizer = torch.optim.AdamW(
model.parameters(),
lr=2e-5, # change
weight_decay=0.01
)
total_steps = len(training_loader) * EPOCHS
warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=warmup_steps,
num_training_steps=total_steps
)
# ------------------- Training loop -------------------
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
best_f1 = 0.0
patience_counter = 0
epoch_number = 0
# Initialize with inf to capture best validation loss easily
best_vloss = float('inf')
# ------------------- Training loop ------------------- for epoch in range(EPOCHS):
# For each epoch: print(f"EPOCH {epoch_number + 1}")
model.train(True)
for step, batch in enumerate(training_loader):
optimizer.zero_grad()
# forward pass get logits for each head
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
outputs = model(input_ids, attention_mask)
# compute total loss
loss = 0
for task in criterions.keys():
labels = batch[task].to(device)
loss += criterions[task](outputs[task], labels)
# ------------------- Stopping logic ------------------- total_train_loss = loss.item()
# After each epoch, find mean of 4 macro f1 scores
# If there is no improvement for 3 epochs consecutively, stop training
# Prevents overfitting which saves time and resources
loss.backward()
# clip gradients to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
if step % 50 == 0:
print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}")
avg_train_loss = total_train_loss / len(training_loader)
writer.add_scalar("Loss/train", avg_train_loss, epoch_number)
print(f"Average training loss: {avg_train_loss:.4f}")
train_df = pd.read_csv(train) # switch to evaluation mode
bug_weights = compute_weights(train_df, 'bug_report') model.eval()
feature_weights = compute_weights(train_df, 'feature_request')
aspect_weights = compute_weights(train_df, 'aspect') all_preds = {task: [] for task in criterions.keys()}
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment') all_labels = {task: [] for task in criterions.keys()}
with torch.no_grad():
for batch in validation_loader:
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
outputs = model(input_ids, attention_mask)
v_loss = 0.0
for task in criterions.keys():
labels = batch[task].to(device)
v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*)
preds = torch.argmax(outputs[task], dim=1).cpu().numpy()
all_preds[task].extend(preds)
all_labels[task].extend(labels.cpu().numpy())
avg_vloss = v_loss / len(validation_loader)
writer.add_scalar("Loss/val", avg_vloss, epoch_number)
print("\nValidation Metrics:")
epoch_f1 = []
for task in criterions.keys():
task_f1 = f1_score(all_labels[task], all_preds[task], average='macro')
epoch_f1.append(task_f1)
writer.add_scalar(f"F1/val_{task}", task_f1, epoch_number)
print(f" {task} Macro F1: {task_f1:.4f}")
avg_macro_f1 = np.mean(epoch_f1)
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch_number)
print(f" Average Macro F1: {avg_macro_f1:.4f}")
if avg_macro_f1 > best_f1:
best_f1 = avg_macro_f1
patience_counter = 0
torch.save(model.state_dict(), f"outputs/best_mode.pt")
print(" New best model saved.")
else:
patience_counter += 1
print(f" No improvement. Patience counter: {patience_counter}/{PATIENCE}")
if patience_counter >= PATIENCE:
print(" Early stopping triggered.")
break
writer.close()
print("Training complete.")
if __name__ == "__main__":
main()