Fixed a few issues with performance data collection and debugging output, mtl training is ready, moving on to single-task training to compare in write-up

This commit is contained in:
2026-02-26 17:40:37 +00:00
parent df6aec7165
commit 01e2142276

View File

@@ -1,30 +1,31 @@
# train.py
# some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html
import argparse # argparse for later switching to boosted data
import os
from datetime import datetime
import torch
import random
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
import pandas as pd
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import classification_report, f1_score
from sklearn.utils.class_weight import compute_class_weight
from dataset import ReviewDataset
from model import Model
# NFR5, reproducibility
SEED = 4321
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
EPOCHS = 5
PATIENCE = 3
# class weights, training loop and early stopping
# ------------------- Class weights -------------------
@@ -35,41 +36,60 @@ def compute_weights(df, column, device):
weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column])
return torch.tensor(weights, dtype=torch.float).to(device)
# parse_args() - NFR7 and NFR9
# Example Usages: python src/train.py --dataset boosted
# python src/train.py --epochs 15 NOTE: 8 - 12 epochs has seen best results so far
def parse_args():
parser = argparse.ArgumentParser(description="RECLASS, Multitask learning for review classification.")
parser.add_argument("--dataset", type=str, default="original", choices=["original", "boosted"], help="Choose between 'original' and 'boosted' dataset.")
parser.add_argument("--batch_size", type=int, default=16, help="Keep to 16 or 8 for 8GB VRAM")
parser.add_argument("--epochs", type=int, default=5, help="Maxiumum training epochs.")
parser.add_argument("--patience", type=int, default=3, help="Patience for early stopping.")
parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate")
return parser.parse_args()
def main():
args = parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Starting training...", flush=True)
print("Using device:", device)
# Remove randomness
if torch.cuda.is_available():
print("GPU:", torch.cuda.get_device_name(0))
torch.cuda.manual_seed_all(SEED)
torch.cuda.manual_seed(SEED)
print(f"Using dataset: {args.dataset.upper()}")
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
"""
Data loading:
"""
train = f"data/processed/{args.dataset}_train.csv"
val = f"data/processed/{args.dataset}_val.csv"
os.makedirs("outputs", exist_ok=True)
os.makedirs("runs", exist_ok=True)
# FR1, FR2, Multilingual tokenizer initilization
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
train = "data/processed/original_train.csv"
val = "data/processed/original_val.csv"
train_dataset = ReviewDataset(train, tokenizer)
val_dataset = ReviewDataset(val, tokenizer)
training_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
training_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
# FR3, shared multilingual model with task-specific heads
model = Model().to(device)
train_df = pd.read_csv(train)
# move input_ids, attention_mask and labels to device in each batch
# weights
# Class weights
print("\n Computing class weights...")
bug_weights = compute_weights(train_df, 'bug_report', device)
feature_weights = compute_weights(train_df, 'feature_request', device)
aspect_weights = compute_weights(train_df, 'aspect', device)
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device)
# Move tensors to cpu and conver to numpy for usage with sklearn classification report
# Use detatch() later for predictions
print("Bug report class weights:", bug_weights.cpu().numpy())
@@ -77,13 +97,12 @@ def main():
print("Aspect class weights:", aspect_weights.cpu().numpy())
print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy())
# -------------------- Loss Functions -------------------
# for later
# 1.0 * bug_loss +
# 1.0 * feature_loss +
# 0.5 * aspect_loss +
# 0.5 * sentiment_loss
# FR4-FR7: Task specific loss functions
criterions = {
'bug_report': nn.CrossEntropyLoss(weight=bug_weights),
'feature_request': nn.CrossEntropyLoss(weight=feature_weights),
@@ -94,11 +113,11 @@ def main():
# -------------------- Optimizer and scheduler -------------------
optimizer = torch.optim.AdamW(
model.parameters(),
lr=2e-5, # change
lr=args.lr, # change
weight_decay=0.01
)
total_steps = len(training_loader) * EPOCHS
total_steps = len(training_loader) * args.epochs
warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup
scheduler = get_linear_schedule_with_warmup(
@@ -113,17 +132,16 @@ def main():
best_f1 = 0.0
patience_counter = 0
epoch_number = 0
# Initialize with inf to capture best validation loss easily
best_vloss = float('inf')
for epoch in range(EPOCHS):
print(f"EPOCH {epoch_number + 1}")
for epoch in range(args.epochs):
print(f"EPOCH {epoch + 1}/{args.epochs}")
model.train(True)
total_train_loss = 0.0
for step, batch in enumerate(training_loader):
optimizer.zero_grad()
@@ -131,30 +149,33 @@ def main():
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
# FR8, Multitask forward pass
outputs = model(input_ids, attention_mask)
# compute total loss
loss = 0
for task in criterions.keys():
labels = batch[task].to(device)
loss += criterions[task](outputs[task], labels)
total_train_loss = loss.item()
total_train_loss += loss.item()
loss.backward()
# clip gradients to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
if step % 50 == 0:
print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}")
avg_train_loss = total_train_loss / len(training_loader)
writer.add_scalar("Loss/train", avg_train_loss, epoch_number)
writer.add_scalar("Loss/train", avg_train_loss, epoch)
print(f"Average training loss: {avg_train_loss:.4f}")
# switch to evaluation mode
# -------------------- Validation loop -------------------
model.eval()
total_val_loss = 0.0
all_preds = {task: [] for task in criterions.keys()}
all_labels ={task: [] for task in criterions.keys()}
@@ -165,37 +186,48 @@ def main():
attention_mask = batch["attention_mask"].to(device)
outputs = model(input_ids, attention_mask)
v_loss = 0.0
v_loss = 0.0 # batch validation loss
for task in criterions.keys():
labels = batch[task].to(device)
v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*)
preds = torch.argmax(outputs[task], dim=1).cpu().numpy()
all_preds[task].extend(preds)
all_labels[task].extend(labels.cpu().numpy())
avg_vloss = v_loss / len(validation_loader)
writer.add_scalar("Loss/val", avg_vloss, epoch_number)
print("\nValidation Metrics:")
total_val_loss += v_loss
avg_vloss = total_val_loss / len(validation_loader)
writer.add_scalar("Loss/val", avg_vloss, epoch)
# FR11, Performance evaluation
print("\nValidation Metrics (MACRO F1):")
epoch_f1 = []
for task in criterions.keys():
task_f1 = f1_score(all_labels[task], all_preds[task], average='macro')
epoch_f1.append(task_f1)
writer.add_scalar(f"F1/val_{task}", task_f1, epoch_number)
print(f" {task} Macro F1: {task_f1:.4f}")
writer.add_scalar(f"F1/val_{task}", task_f1, epoch)
print(f" {task}: {task_f1:.4f}")
avg_macro_f1 = np.mean(epoch_f1)
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch_number)
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch)
print(f" Average Macro F1: {avg_macro_f1:.4f}")
# NFR4, Early stopping
if avg_macro_f1 > best_f1:
best_f1 = avg_macro_f1
patience_counter = 0
torch.save(model.state_dict(), f"outputs/best_mode.pt")
print(" New best model saved.")
# Save the model with a name for the type of dataset and epoch for later analysis
model_save_path = f"outputs/best_model_{args.dataset}.pt"
torch.save(model.state_dict(), model_save_path)
print(" New best model saved to:", model_save_path)
else:
patience_counter += 1
print(f" No improvement. Patience counter: {patience_counter}/{PATIENCE}")
if patience_counter >= PATIENCE:
print(f" No improvement. Patience counter: {patience_counter}/{args.patience}")
if patience_counter >= args.patience:
print(" Early stopping triggered.")
break
writer.close()
print("Training complete.")