Fixed a few issues with performance data collection and debugging output, mtl training is ready, moving on to single-task training to compare in write-up
This commit is contained in:
116
src/train.py
116
src/train.py
@@ -1,30 +1,31 @@
|
|||||||
# train.py
|
# train.py
|
||||||
# some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html
|
# some code directly from pytorch docs https://docs.pytorch.org/tutorials/beginner/introyt/trainingyt.html
|
||||||
|
import argparse # argparse for later switching to boosted data
|
||||||
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import torch
|
import torch
|
||||||
import random
|
import random
|
||||||
from sklearn.utils.class_weight import compute_class_weight
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
import pandas as pd
|
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
import torch.optim as optim
|
|
||||||
from transformers import get_linear_schedule_with_warmup
|
from transformers import get_linear_schedule_with_warmup
|
||||||
from sklearn.metrics import classification_report, f1_score
|
from sklearn.metrics import classification_report, f1_score
|
||||||
|
from sklearn.utils.class_weight import compute_class_weight
|
||||||
|
|
||||||
|
|
||||||
from dataset import ReviewDataset
|
from dataset import ReviewDataset
|
||||||
from model import Model
|
from model import Model
|
||||||
|
|
||||||
|
# NFR5, reproducibility
|
||||||
SEED = 4321
|
SEED = 4321
|
||||||
torch.manual_seed(SEED)
|
torch.manual_seed(SEED)
|
||||||
np.random.seed(SEED)
|
np.random.seed(SEED)
|
||||||
random.seed(SEED)
|
random.seed(SEED)
|
||||||
|
|
||||||
EPOCHS = 5
|
|
||||||
PATIENCE = 3
|
|
||||||
|
|
||||||
# class weights, training loop and early stopping
|
# class weights, training loop and early stopping
|
||||||
|
|
||||||
# ------------------- Class weights -------------------
|
# ------------------- Class weights -------------------
|
||||||
@@ -35,41 +36,60 @@ def compute_weights(df, column, device):
|
|||||||
weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column])
|
weights = compute_class_weight(class_weight='balanced', classes=classes, y=df[column])
|
||||||
return torch.tensor(weights, dtype=torch.float).to(device)
|
return torch.tensor(weights, dtype=torch.float).to(device)
|
||||||
|
|
||||||
|
# parse_args() - NFR7 and NFR9
|
||||||
|
# Example Usages: python src/train.py --dataset boosted
|
||||||
|
# python src/train.py --epochs 15 NOTE: 8 - 12 epochs has seen best results so far
|
||||||
|
def parse_args():
|
||||||
|
parser = argparse.ArgumentParser(description="RECLASS, Multitask learning for review classification.")
|
||||||
|
parser.add_argument("--dataset", type=str, default="original", choices=["original", "boosted"], help="Choose between 'original' and 'boosted' dataset.")
|
||||||
|
parser.add_argument("--batch_size", type=int, default=16, help="Keep to 16 or 8 for 8GB VRAM")
|
||||||
|
parser.add_argument("--epochs", type=int, default=5, help="Maxiumum training epochs.")
|
||||||
|
parser.add_argument("--patience", type=int, default=3, help="Patience for early stopping.")
|
||||||
|
parser.add_argument("--lr", type=float, default=2e-5, help="Learning rate")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
args = parse_args()
|
||||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
print("Starting training...", flush=True)
|
||||||
print("Using device:", device)
|
print("Using device:", device)
|
||||||
# Remove randomness
|
# Remove randomness
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
print("GPU:", torch.cuda.get_device_name(0))
|
print("GPU:", torch.cuda.get_device_name(0))
|
||||||
torch.cuda.manual_seed_all(SEED)
|
torch.cuda.manual_seed_all(SEED)
|
||||||
torch.cuda.manual_seed(SEED)
|
torch.cuda.manual_seed(SEED)
|
||||||
|
print(f"Using dataset: {args.dataset.upper()}")
|
||||||
torch.backends.cudnn.deterministic = True
|
torch.backends.cudnn.deterministic = True
|
||||||
torch.backends.cudnn.benchmark = False
|
torch.backends.cudnn.benchmark = False
|
||||||
|
|
||||||
|
"""
|
||||||
|
Data loading:
|
||||||
|
|
||||||
|
"""
|
||||||
|
train = f"data/processed/{args.dataset}_train.csv"
|
||||||
|
val = f"data/processed/{args.dataset}_val.csv"
|
||||||
|
os.makedirs("outputs", exist_ok=True)
|
||||||
|
os.makedirs("runs", exist_ok=True)
|
||||||
|
|
||||||
|
# FR1, FR2, Multilingual tokenizer initilization
|
||||||
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||||
|
|
||||||
train = "data/processed/original_train.csv"
|
|
||||||
val = "data/processed/original_val.csv"
|
|
||||||
|
|
||||||
train_dataset = ReviewDataset(train, tokenizer)
|
train_dataset = ReviewDataset(train, tokenizer)
|
||||||
val_dataset = ReviewDataset(val, tokenizer)
|
val_dataset = ReviewDataset(val, tokenizer)
|
||||||
|
|
||||||
training_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
|
training_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
|
||||||
validation_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
|
validation_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
|
||||||
|
|
||||||
|
# FR3, shared multilingual model with task-specific heads
|
||||||
model = Model().to(device)
|
model = Model().to(device)
|
||||||
|
|
||||||
train_df = pd.read_csv(train)
|
train_df = pd.read_csv(train)
|
||||||
# move input_ids, attention_mask and labels to device in each batch
|
|
||||||
|
|
||||||
# weights
|
# Class weights
|
||||||
|
print("\n Computing class weights...")
|
||||||
bug_weights = compute_weights(train_df, 'bug_report', device)
|
bug_weights = compute_weights(train_df, 'bug_report', device)
|
||||||
feature_weights = compute_weights(train_df, 'feature_request', device)
|
feature_weights = compute_weights(train_df, 'feature_request', device)
|
||||||
aspect_weights = compute_weights(train_df, 'aspect', device)
|
aspect_weights = compute_weights(train_df, 'aspect', device)
|
||||||
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device)
|
aspect_sentiment_weights = compute_weights(train_df, 'aspect_sentiment', device)
|
||||||
|
|
||||||
# Move tensors to cpu and conver to numpy for usage with sklearn classification report
|
# Move tensors to cpu and conver to numpy for usage with sklearn classification report
|
||||||
# Use detatch() later for predictions
|
# Use detatch() later for predictions
|
||||||
print("Bug report class weights:", bug_weights.cpu().numpy())
|
print("Bug report class weights:", bug_weights.cpu().numpy())
|
||||||
@@ -77,13 +97,12 @@ def main():
|
|||||||
print("Aspect class weights:", aspect_weights.cpu().numpy())
|
print("Aspect class weights:", aspect_weights.cpu().numpy())
|
||||||
print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy())
|
print("Aspect sentiment class weights:", aspect_sentiment_weights.cpu().numpy())
|
||||||
|
|
||||||
# -------------------- Loss Functions -------------------
|
|
||||||
# for later
|
# for later
|
||||||
# 1.0 * bug_loss +
|
# 1.0 * bug_loss +
|
||||||
# 1.0 * feature_loss +
|
# 1.0 * feature_loss +
|
||||||
# 0.5 * aspect_loss +
|
# 0.5 * aspect_loss +
|
||||||
# 0.5 * sentiment_loss
|
# 0.5 * sentiment_loss
|
||||||
|
# FR4-FR7: Task specific loss functions
|
||||||
criterions = {
|
criterions = {
|
||||||
'bug_report': nn.CrossEntropyLoss(weight=bug_weights),
|
'bug_report': nn.CrossEntropyLoss(weight=bug_weights),
|
||||||
'feature_request': nn.CrossEntropyLoss(weight=feature_weights),
|
'feature_request': nn.CrossEntropyLoss(weight=feature_weights),
|
||||||
@@ -94,11 +113,11 @@ def main():
|
|||||||
# -------------------- Optimizer and scheduler -------------------
|
# -------------------- Optimizer and scheduler -------------------
|
||||||
optimizer = torch.optim.AdamW(
|
optimizer = torch.optim.AdamW(
|
||||||
model.parameters(),
|
model.parameters(),
|
||||||
lr=2e-5, # change
|
lr=args.lr, # change
|
||||||
weight_decay=0.01
|
weight_decay=0.01
|
||||||
)
|
)
|
||||||
|
|
||||||
total_steps = len(training_loader) * EPOCHS
|
total_steps = len(training_loader) * args.epochs
|
||||||
warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup
|
warmup_steps = int(0.1 * total_steps) # 10% of steps for warmup
|
||||||
|
|
||||||
scheduler = get_linear_schedule_with_warmup(
|
scheduler = get_linear_schedule_with_warmup(
|
||||||
@@ -113,17 +132,16 @@ def main():
|
|||||||
|
|
||||||
best_f1 = 0.0
|
best_f1 = 0.0
|
||||||
patience_counter = 0
|
patience_counter = 0
|
||||||
epoch_number = 0
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Initialize with inf to capture best validation loss easily
|
# Initialize with inf to capture best validation loss easily
|
||||||
best_vloss = float('inf')
|
best_vloss = float('inf')
|
||||||
|
|
||||||
for epoch in range(EPOCHS):
|
for epoch in range(args.epochs):
|
||||||
print(f"EPOCH {epoch_number + 1}")
|
print(f"EPOCH {epoch + 1}/{args.epochs}")
|
||||||
model.train(True)
|
model.train(True)
|
||||||
|
|
||||||
|
total_train_loss = 0.0
|
||||||
|
|
||||||
for step, batch in enumerate(training_loader):
|
for step, batch in enumerate(training_loader):
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
|
|
||||||
@@ -131,33 +149,36 @@ def main():
|
|||||||
input_ids = batch["input_ids"].to(device)
|
input_ids = batch["input_ids"].to(device)
|
||||||
attention_mask = batch["attention_mask"].to(device)
|
attention_mask = batch["attention_mask"].to(device)
|
||||||
|
|
||||||
|
# FR8, Multitask forward pass
|
||||||
outputs = model(input_ids, attention_mask)
|
outputs = model(input_ids, attention_mask)
|
||||||
# compute total loss
|
|
||||||
loss = 0
|
loss = 0
|
||||||
for task in criterions.keys():
|
for task in criterions.keys():
|
||||||
labels = batch[task].to(device)
|
labels = batch[task].to(device)
|
||||||
loss += criterions[task](outputs[task], labels)
|
loss += criterions[task](outputs[task], labels)
|
||||||
|
|
||||||
total_train_loss = loss.item()
|
total_train_loss += loss.item()
|
||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
# clip gradients to prevent exploding gradients
|
# clip gradients to prevent exploding gradients
|
||||||
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
||||||
|
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
|
||||||
if step % 50 == 0:
|
if step % 50 == 0:
|
||||||
print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}")
|
print(f" Batch {step}/{len(training_loader)} - Loss: {loss.item():.4f}")
|
||||||
|
|
||||||
avg_train_loss = total_train_loss / len(training_loader)
|
avg_train_loss = total_train_loss / len(training_loader)
|
||||||
writer.add_scalar("Loss/train", avg_train_loss, epoch_number)
|
writer.add_scalar("Loss/train", avg_train_loss, epoch)
|
||||||
print(f"Average training loss: {avg_train_loss:.4f}")
|
print(f"Average training loss: {avg_train_loss:.4f}")
|
||||||
|
|
||||||
# switch to evaluation mode
|
# -------------------- Validation loop -------------------
|
||||||
model.eval()
|
model.eval()
|
||||||
|
total_val_loss = 0.0
|
||||||
|
|
||||||
all_preds = {task: [] for task in criterions.keys()}
|
all_preds = {task: [] for task in criterions.keys()}
|
||||||
all_labels = {task: [] for task in criterions.keys()}
|
all_labels ={task: [] for task in criterions.keys()}
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for batch in validation_loader:
|
for batch in validation_loader:
|
||||||
@@ -165,37 +186,48 @@ def main():
|
|||||||
attention_mask = batch["attention_mask"].to(device)
|
attention_mask = batch["attention_mask"].to(device)
|
||||||
|
|
||||||
outputs = model(input_ids, attention_mask)
|
outputs = model(input_ids, attention_mask)
|
||||||
v_loss = 0.0
|
|
||||||
|
v_loss = 0.0 # batch validation loss
|
||||||
for task in criterions.keys():
|
for task in criterions.keys():
|
||||||
labels = batch[task].to(device)
|
labels = batch[task].to(device)
|
||||||
v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*)
|
v_loss += criterions[task](outputs[task], labels).item() # detatch .item(*)
|
||||||
|
|
||||||
preds = torch.argmax(outputs[task], dim=1).cpu().numpy()
|
preds = torch.argmax(outputs[task], dim=1).cpu().numpy()
|
||||||
all_preds[task].extend(preds)
|
all_preds[task].extend(preds)
|
||||||
all_labels[task].extend(labels.cpu().numpy())
|
all_labels[task].extend(labels.cpu().numpy())
|
||||||
avg_vloss = v_loss / len(validation_loader)
|
total_val_loss += v_loss
|
||||||
writer.add_scalar("Loss/val", avg_vloss, epoch_number)
|
|
||||||
print("\nValidation Metrics:")
|
avg_vloss = total_val_loss / len(validation_loader)
|
||||||
|
writer.add_scalar("Loss/val", avg_vloss, epoch)
|
||||||
|
|
||||||
|
# FR11, Performance evaluation
|
||||||
|
print("\nValidation Metrics (MACRO F1):")
|
||||||
epoch_f1 = []
|
epoch_f1 = []
|
||||||
for task in criterions.keys():
|
for task in criterions.keys():
|
||||||
task_f1 = f1_score(all_labels[task], all_preds[task], average='macro')
|
task_f1 = f1_score(all_labels[task], all_preds[task], average='macro')
|
||||||
epoch_f1.append(task_f1)
|
epoch_f1.append(task_f1)
|
||||||
writer.add_scalar(f"F1/val_{task}", task_f1, epoch_number)
|
writer.add_scalar(f"F1/val_{task}", task_f1, epoch)
|
||||||
print(f" {task} Macro F1: {task_f1:.4f}")
|
print(f" {task}: {task_f1:.4f}")
|
||||||
|
|
||||||
avg_macro_f1 = np.mean(epoch_f1)
|
avg_macro_f1 = np.mean(epoch_f1)
|
||||||
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch_number)
|
writer.add_scalar("F1/val_macro_avg", avg_macro_f1, epoch)
|
||||||
print(f" Average Macro F1: {avg_macro_f1:.4f}")
|
print(f" Average Macro F1: {avg_macro_f1:.4f}")
|
||||||
|
|
||||||
|
# NFR4, Early stopping
|
||||||
if avg_macro_f1 > best_f1:
|
if avg_macro_f1 > best_f1:
|
||||||
best_f1 = avg_macro_f1
|
best_f1 = avg_macro_f1
|
||||||
patience_counter = 0
|
patience_counter = 0
|
||||||
torch.save(model.state_dict(), f"outputs/best_mode.pt")
|
# Save the model with a name for the type of dataset and epoch for later analysis
|
||||||
print(" New best model saved.")
|
model_save_path = f"outputs/best_model_{args.dataset}.pt"
|
||||||
|
torch.save(model.state_dict(), model_save_path)
|
||||||
|
print(" New best model saved to:", model_save_path)
|
||||||
else:
|
else:
|
||||||
patience_counter += 1
|
patience_counter += 1
|
||||||
print(f" No improvement. Patience counter: {patience_counter}/{PATIENCE}")
|
print(f" No improvement. Patience counter: {patience_counter}/{args.patience}")
|
||||||
if patience_counter >= PATIENCE:
|
if patience_counter >= args.patience:
|
||||||
print(" Early stopping triggered.")
|
print(" Early stopping triggered.")
|
||||||
break
|
break
|
||||||
|
|
||||||
writer.close()
|
writer.close()
|
||||||
print("Training complete.")
|
print("Training complete.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user