From 76d9b8509bf8112e75398d10777ebdaff8572f8c Mon Sep 17 00:00:00 2001 From: charlie-rasberry Date: Fri, 20 Feb 2026 19:17:22 +0000 Subject: [PATCH] Model almost complete, need to work on loss functions soon --- src/__pycache__/dataset.cpython-313.pyc | Bin 0 -> 2184 bytes src/model.py | 31 ++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 src/__pycache__/dataset.cpython-313.pyc diff --git a/src/__pycache__/dataset.cpython-313.pyc b/src/__pycache__/dataset.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf36b23c03d222b4d345d682f24bc94b31904039 GIT binary patch literal 2184 zcmcIl&2JM&6rc63*N(CCMF_-1iR&1mZ9+uFN1&n(EvS(I(UOe?5|&2GdXsEfug%OD zD9V9eDkP{xkRnqqBwRvH$uUR%gsgnPnub$Pxm0Nb_rBS+1C5ZXN@b+|&6_uG-q*f) zvz^7_i--iXL_)9*U6d~n9{u82o)R-(2ogC`{*gy9wq2WH%)9>){z#KA{xt0cxQ{kEyQ)J^hMpE`odXY) zN0E9@`cZ2=nfaw{8}^ae2v^YWlQj$rNY>6?4t*?L%~WL&5qZ}_T!uJFT+k9}gNX_a zDbfMRBUoeseBQU)XP@nhw}T+K+}8Q{cy)Ac^x^D!^dRX1rn&PkU?Gh8|1=BSc?8Y@ zWB2%&!#PP5y6&SJf^*Hf|KM;=@YN3(4N?#3vaaai(_vT6$+{|zP6$tS4L#CK3rI!( zUFy1jD}|B?k+@w0D)PTZ*CK2-efZzC$3Cg{J)cy2{6p<}Vnod#t@ScjiQhu}n>Yz~ znQ4ko^GMk!6l`~z#~7)&rorrz%Ug-XD#XRia%qWB-eSAu3d44R5>cbP)nKqCL~+re zH+k$vWf~K!Tq2COO<4vUwqSj$Vo}CbgO)9m@%CWGRGcgVXe#LkZ6qb&GXZG<0|GJt zTExF*T(O;!`M#0x2nEH~J&T7~iI_874cvr?#Qoy3UARkL=k`~7=X#%-UPpT2l-Du*K;4R?t^?KVT(;ic zT|e0WeC*e;`hg5k_5OjaJ*X?ah19Or2hpv=D4AXu@_I&I_nh>4POjdn^_;Hv9bO+C z_4-b|?mO%Coqg3&>pQoR?0-J|Ywbttmep^e9Z}exL&pw-d zd46Sld3@#4@};%C-tbrJ+3&pJ?`!>6Ho6a1ohQ!1@uktl(WNtsXI5oz;P~n_Zy;Oi z9@|JAT8h7jznomTx_osdznoti@Q!5H$F6usCTc1D&qSm>wuK@sF~1jSvY&guM*P&n zJGY6T)4_8Do!K>t7UV95aj{gWID(F2e5+zOjh!}(r))x*W4o4H!kCC;lKldP0Qf$P zDT87&F_aRqZ&a8?F(#cWNpexa!MjQ8Fm> zs%6717?g;H!`uD(7^yJZp(EmHA)=%5I3tEjr%I#HoRe literal 0 HcmV?d00001 diff --git a/src/model.py b/src/model.py index 1969b3d..0996802 100644 --- a/src/model.py +++ b/src/model.py @@ -24,13 +24,40 @@ class Model(nn.Module): self.feature_head = nn.Linear(hidden_size, 2) self.aspect_head = nn.Linear(hidden_size, 6) self.aspect_sentiment_head = nn.Linear(hidden_size, 3) + + # Pass through encoder then extract the token representation + # Apply droupout to it, take scores for each head, return them in a dictionary def forward(self, input_ids, attention_mask): outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask) + output = outputs.last_hidden_state[:, 0, :] + output = self.dropout(output) + # Logits for each head: + bug_logits = self.bug_head(output) + feature_logits = self.feature_head(output) + aspect_logits = self.aspect_head(output) + aspect_sentiment = self.aspect_sentiment_head(output) + return { + 'bug_report': bug_logits, + 'feature_request': feature_logits, + 'aspect': aspect_logits, + 'aspect_sentiment': aspect_sentiment + } + +if __name__ == "__main__": + from dataset import ReviewDataset + from transformers import AutoTokenizer + from torch.utils.data import DataLoader + tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base") + dataset = ReviewDataset("data/processed/original_train.csv", tokenizer) + loader = DataLoader(dataset, batch_size=2) + batch = next(iter(loader)) + model = Model() + outputs = model(batch["input_ids"], batch["attention_mask"]) -tokenizer = AutoTokenizer.from_pretrained("FacebookAI/xlm-roberta-base") -model = AutoModelForMaskedLM.from_pretrained("FacebookAI/xlm-roberta-base") \ No newline at end of file + for k, v in outputs.items(): + print(k, v.shape) \ No newline at end of file