Small bit of progress towards model.py, now building forward()
This commit is contained in:
@@ -18,8 +18,8 @@ class ReviewDataset(Dataset):
|
|||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
review = self.df.iloc[idx]['review']
|
review = self.df.iloc[idx]['review']
|
||||||
|
|
||||||
# encoding['input_ids']
|
# encoding['input_ids'] 1D tensor of token ids, shape [max_length]
|
||||||
# encoding['attention_mask']
|
# encoding['attention_mask'] 1D tensor of 1s 0s showing real tokens vs padding, shape [max_length]
|
||||||
# Both have shape [1, max_length] because of return_tensors='pt'
|
# Both have shape [1, max_length] because of return_tensors='pt'
|
||||||
# Squeeze them to [max_length] with .squeeze(0)
|
# Squeeze them to [max_length] with .squeeze(0)
|
||||||
encoding = self.tokenizer(
|
encoding = self.tokenizer(
|
||||||
@@ -35,6 +35,7 @@ class ReviewDataset(Dataset):
|
|||||||
|
|
||||||
# 'attention_mask': tensor of shape [max_length]
|
# 'attention_mask': tensor of shape [max_length]
|
||||||
|
|
||||||
|
# MTL structure labels as tensor scalars:
|
||||||
# 'bug_report': tensor scalar (torch.tensor(label_value))
|
# 'bug_report': tensor scalar (torch.tensor(label_value))
|
||||||
# 'feature_request': tensor scalar (torch.tensor(label_value))
|
# 'feature_request': tensor scalar (torch.tensor(label_value))
|
||||||
# 'aspect': tensor scalar (torch.tensor(label_value))
|
# 'aspect': tensor scalar (torch.tensor(label_value))
|
||||||
|
|||||||
36
src/model.py
36
src/model.py
@@ -0,0 +1,36 @@
|
|||||||
|
# model.py
|
||||||
|
# One encoder, four shared heads(bug report, feature request, aspect, aspect sentiment)
|
||||||
|
# 12 transformer layers, 12 attention heads
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer, AutoModelForMaskedLM, XLMRobertaModel
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
# Using dropout, This has proven to be an effective technique
|
||||||
|
# for regularization and preventing the co-adaptation of neurons as described in https://arxiv.org/abs/1207.0580
|
||||||
|
|
||||||
|
# Each nn.linear is used to map RoBERTa's hidden representation onto the output space of each task head
|
||||||
|
# Each hidden representation is size 768
|
||||||
|
class Model(nn.Module):
|
||||||
|
def __init__(self, dropout_rate=0.2): # Try other p values
|
||||||
|
super().__init__()
|
||||||
|
self.encoder = XLMRobertaModel.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||||
|
|
||||||
|
hidden_size = self.encoder.config.hidden_size
|
||||||
|
|
||||||
|
# Applied across whole output, shared
|
||||||
|
self.dropout = nn.Dropout(dropout_rate)
|
||||||
|
|
||||||
|
self.bug_head = nn.Linear(hidden_size, 2)
|
||||||
|
self.feature_head = nn.Linear(hidden_size, 2)
|
||||||
|
self.aspect_head = nn.Linear(hidden_size, 6)
|
||||||
|
self.aspect_sentiment_head = nn.Linear(hidden_size, 3)
|
||||||
|
def forward(self, input_ids, attention_mask):
|
||||||
|
outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||||
|
model = AutoModelForMaskedLM.from_pretrained("FacebookAI/xlm-roberta-base")
|
||||||
Reference in New Issue
Block a user