Fixed a few issues with performance data collection and debugging output, mtl training is ready, moving on to single-task training to compare in write-up

This commit is contained in:
2026-02-26 17:40:37 +00:00
parent df6aec7165
commit 01e2142276

View File

@@ -1,30 +1,31 @@
# train.py # train.py
# some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html # some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html
import argparse # argparse for later switching to boosted data
import os
from datetime import datetime from datetime import datetime
import torch import torch
import random import random
from sklearn.utils.class_weight import compute_class_weight
import numpy as np import numpy as np
import pandas as pd
import torch.nn as nn import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from transformers import AutoTokenizer from transformers import AutoTokenizer
import pandas as pd
from torch.utils.tensorboard import SummaryWriter from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
from transformers import get_linear_schedule_with_warmup from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import classification_report, f1_score from sklearn.metrics import classification_report, f1_score
from sklearn.utils.class_weight import compute_class_weight
from dataset import ReviewDataset from dataset import ReviewDataset
from model import Model from model import Model
# NFR5, reproducibility
SEED = 4321 SEED = 4321
torch.manual_seed(SEED) torch.manual_seed(SEED)
np.random.seed(SEED) np.random.seed(SEED)
random.seed(SEED) random.seed(SEED)
EPOCHS = 5
PATIENCE = 3
# class weights, training loop and early stopping # class weights, training loop and early stopping
# ------------------- Class weights ------------------- # ------------------- Class weights -------------------
@@ -35,41 +36,60 @@ def compute_weights(df, column, device):
weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column]) weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column])
return torch.tensor(weights, dtype=torch.float).to(device) return torch.tensor(weights, dtype=torch.float).to(device)
# parse_args() - NFR7 and NFR9
# Example Usages: python src/train.py --dataset boosted
# python src/train.py --epochs 15 NOTE: 8 - 12 epochs has seen best results so far
def parse_args():
parser = argparse.ArgumentParser(description="RECLASS, Multitask learning for review classification.")
parser.add_argument("--dataset", type=str, default="original", choices=["original", "boosted"], help="Choose between 'original' and 'boosted' dataset.")
parser.add_argument("--batch_size", type=int, default=16, help="Keep to 16 or 8 for 8GB VRAM")
parser.add_argument("--epochs", type=int, default=5, help="Maxiumum training epochs.")
parser.add_argument("--patience", type=int, default=3, help="Patience for early stopping.")
parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate")
return parser.parse_args()
def main(): def main():
args = parse_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Starting training...", flush=True)
print("Using device:", device) print("Using device:", device)
# Remove randomness # Remove randomness
if torch.cuda.is_available(): if torch.cuda.is_available():
print("GPU:", torch.cuda.get_device_name(0)) print("GPU:", torch.cuda.get_device_name(0))
torch.cuda.manual_seed_all(SEED) torch.cuda.manual_seed_all(SEED)
torch.cuda.manual_seed(SEED) torch.cuda.manual_seed(SEED)
print(f"Using dataset: {args.dataset.upper()}")
torch.backends.cudnn.deterministic = True torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False torch.backends.cudnn.benchmark = False
"""
Data loading:
"""
train = f"data/processed/{args.dataset}_train.csv"
val = f"data/processed/{args.dataset}_val.csv"
os.makedirs("outputs", exist_ok=True)
os.makedirs("runs", exist_ok=True)
# FR1, FR2, Multilingual tokenizer initilization
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base") tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
train = "data/processed/original_train.csv"
val = "data/processed/original_val.csv"
train_dataset = ReviewDataset(train, tokenizer) train_dataset = ReviewDataset(train, tokenizer)
val_dataset = ReviewDataset(val, tokenizer) val_dataset = ReviewDataset(val, tokenizer)
training_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) training_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=16, shuffle=False) validation_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
# FR3, shared multilingual model with task-specific heads
model = Model().to(device) model = Model().to(device)
train_df = pd.read_csv(train) train_df = pd.read_csv(train)
# move input_ids, attention_mask and labels to device in each batch
# weights # Class weights
print("\n Computing class weights...")
bug_weights = compute_weights(train_df, 'bug_report', device) bug_weights = compute_weights(train_df, 'bug_report', device)
feature_weights = compute_weights(train_df, 'feature_request', device) feature_weights = compute_weights(train_df, 'feature_request', device)
aspect_weights = compute_weights(train_df, 'aspect', device) aspect_weights = compute_weights(train_df, 'aspect', device)
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device) aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device)
# Move tensors to cpu and conver to numpy for usage with sklearn classification report # Move tensors to cpu and conver to numpy for usage with sklearn classification report
# Use detatch() later for predictions # Use detatch() later for predictions
print("Bug report class weights:", bug_weights.cpu().numpy()) print("Bug report class weights:", bug_weights.cpu().numpy())
@@ -77,13 +97,12 @@ def main():
print("Aspect class weights:", aspect_weights.cpu().numpy()) print("Aspect class weights:", aspect_weights.cpu().numpy())
print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy()) print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy())
# -------------------- Loss Functions -------------------
# for later # for later
# 1.0 * bug_loss + # 1.0 * bug_loss +
# 1.0 * feature_loss + # 1.0 * feature_loss +
# 0.5 * aspect_loss + # 0.5 * aspect_loss +
# 0.5 * sentiment_loss # 0.5 * sentiment_loss
# FR4-FR7: Task specific loss functions
criterions = { criterions = {
'bug_report': nn.CrossEntropyLoss(weight=bug_weights), 'bug_report': nn.CrossEntropyLoss(weight=bug_weights),
'feature_request': nn.CrossEntropyLoss(weight=feature_weights), 'feature_request': nn.CrossEntropyLoss(weight=feature_weights),
@@ -94,11 +113,11 @@ def main():
# -------------------- Optimizer and scheduler ------------------- # -------------------- Optimizer and scheduler -------------------
optimizer = torch.optim.AdamW( optimizer = torch.optim.AdamW(
model.parameters(), model.parameters(),
lr=2e-5, # change lr=args.lr, # change
weight_decay=0.01 weight_decay=0.01
) )
total_steps = len(training_loader) * EPOCHS total_steps = len(training_loader) * args.epochs
warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup
scheduler = get_linear_schedule_with_warmup( scheduler = get_linear_schedule_with_warmup(
@@ -113,17 +132,16 @@ def main():
best_f1 = 0.0 best_f1 = 0.0
patience_counter = 0 patience_counter = 0
epoch_number = 0
# Initialize with inf to capture best validation loss easily # Initialize with inf to capture best validation loss easily
best_vloss = float('inf') best_vloss = float('inf')
for epoch in range(EPOCHS): for epoch in range(args.epochs):
print(f"EPOCH {epoch_number + 1}") print(f"EPOCH {epoch + 1}/{args.epochs}")
model.train(True) model.train(True)
total_train_loss = 0.0
for step, batch in enumerate(training_loader): for step, batch in enumerate(training_loader):
optimizer.zero_grad() optimizer.zero_grad()
@@ -131,30 +149,33 @@ def main():
input_ids = batch["input_ids"].to(device) input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device) attention_mask = batch["attention_mask"].to(device)
# FR8, Multitask forward pass
outputs = model(input_ids, attention_mask) outputs = model(input_ids, attention_mask)
# compute total loss
loss = 0 loss = 0
for task in criterions.keys(): for task in criterions.keys():
labels = batch[task].to(device) labels = batch[task].to(device)
loss += criterions[task](outputs[task], labels) loss += criterions[task](outputs[task], labels)
total_train_loss = loss.item() total_train_loss += loss.item()
loss.backward() loss.backward()
# clip gradients to prevent exploding gradients # clip gradients to prevent exploding gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step() optimizer.step()
scheduler.step() scheduler.step()
if step % 50 == 0: if step % 50 == 0:
print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}") print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}")
avg_train_loss = total_train_loss / len(training_loader) avg_train_loss = total_train_loss / len(training_loader)
writer.add_scalar("Loss/train", avg_train_loss, epoch_number) writer.add_scalar("Loss/train", avg_train_loss, epoch)
print(f"Average training loss: {avg_train_loss:.4f}") print(f"Average training loss: {avg_train_loss:.4f}")
# switch to evaluation mode # -------------------- Validation loop -------------------
model.eval() model.eval()
total_val_loss = 0.0
all_preds = {task: [] for task in criterions.keys()} all_preds = {task: [] for task in criterions.keys()}
all_labels ={task: [] for task in criterions.keys()} all_labels ={task: [] for task in criterions.keys()}
@@ -165,37 +186,48 @@ def main():
attention_mask = batch["attention_mask"].to(device) attention_mask = batch["attention_mask"].to(device)
outputs = model(input_ids, attention_mask) outputs = model(input_ids, attention_mask)
v_loss = 0.0
v_loss = 0.0 # batch validation loss
for task in criterions.keys(): for task in criterions.keys():
labels = batch[task].to(device) labels = batch[task].to(device)
v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*) v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*)
preds = torch.argmax(outputs[task], dim=1).cpu().numpy() preds = torch.argmax(outputs[task], dim=1).cpu().numpy()
all_preds[task].extend(preds) all_preds[task].extend(preds)
all_labels[task].extend(labels.cpu().numpy()) all_labels[task].extend(labels.cpu().numpy())
avg_vloss = v_loss / len(validation_loader) total_val_loss += v_loss
writer.add_scalar("Loss/val", avg_vloss, epoch_number)
print("\nValidation Metrics:") avg_vloss = total_val_loss / len(validation_loader)
writer.add_scalar("Loss/val", avg_vloss, epoch)
# FR11, Performance evaluation
print("\nValidation Metrics (MACRO F1):")
epoch_f1 = [] epoch_f1 = []
for task in criterions.keys(): for task in criterions.keys():
task_f1 = f1_score(all_labels[task], all_preds[task], average='macro') task_f1 = f1_score(all_labels[task], all_preds[task], average='macro')
epoch_f1.append(task_f1) epoch_f1.append(task_f1)
writer.add_scalar(f"F1/val_{task}", task_f1, epoch_number) writer.add_scalar(f"F1/val_{task}", task_f1, epoch)
print(f" {task} Macro F1: {task_f1:.4f}") print(f" {task}: {task_f1:.4f}")
avg_macro_f1 = np.mean(epoch_f1) avg_macro_f1 = np.mean(epoch_f1)
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch_number) writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch)
print(f" Average Macro F1: {avg_macro_f1:.4f}") print(f" Average Macro F1: {avg_macro_f1:.4f}")
# NFR4, Early stopping
if avg_macro_f1 > best_f1: if avg_macro_f1 > best_f1:
best_f1 = avg_macro_f1 best_f1 = avg_macro_f1
patience_counter = 0 patience_counter = 0
torch.save(model.state_dict(), f"outputs/best_mode.pt") # Save the model with a name for the type of dataset and epoch for later analysis
print(" New best model saved.") model_save_path = f"outputs/best_model_{args.dataset}.pt"
torch.save(model.state_dict(), model_save_path)
print(" New best model saved to:", model_save_path)
else: else:
patience_counter += 1 patience_counter += 1
print(f" No improvement. Patience counter: {patience_counter}/{PATIENCE}") print(f" No improvement. Patience counter: {patience_counter}/{args.patience}")
if patience_counter >= PATIENCE: if patience_counter >= args.patience:
print(" Early stopping triggered.") print(" Early stopping triggered.")
break break
writer.close() writer.close()
print("Training complete.") print("Training complete.")