# TODO: Refactor,especially change expected names as I jumped the gun when first making this without sampling properly # TODO: Add button labels and finalise the categories of aspects # TODO: Ensure there is persistent progress tracking implentation before labelling # TODO: Finalise keybinds # TODO: Display progress e.g. review 1020 of 5000 # TODO: Validate saving progres # TODO: Loop instead of pressing enter # TODO: Autosave ? / confirm quit at least # TODO: More visual q's import tkinter as tk from tkinter import ttk import pandas as pd """ app to classify / manually annotate reviews for ml training currently has hotkeys for each option 1 0 asdfghjkl path must be to tagged not sampled, it wont remember """ class MultiTag: def __init__(self): self.root = tk.Tk() # root.geometry("400x300") self.active_column = 0 # used for highlighting the current column self.btn_width = 15 # button width self.number_of_aspects = 9 # number of aspect buttons self.root.title("MultiTag") self.display_review = tk.Text(self.root, height=20, width=100, wrap='word') self.display_review.grid(row=0, column=0, columnspan=4, padx=10, pady=10) # highlight for the current box self.highlight = tk.Frame(self.root, bg="#003366", height=20, width=130) self.highlight.grid(row=11, column=0) # Labels ttk.Label(self.root, text="Feature Request ? 1 (yes), 0 (no)").grid(row= 1, column=0) ttk.Label(self.root, text="Bug Report ? 1 (yes), 0 (no)").grid(row= 1, column=1) ttk.Label(self.root, text="Aspect ? A/S/D/F/G/H/J/K/L ").grid(row= 1, column=2) ttk.Label(self.root, text="Aspect Sentiment ? A/S/D").grid(row= 1, column=3) self.feature_true = ttk.Button(self.root, text="1",command=lambda: self.feature_pressed("1"), width= self.btn_width).grid(row=2, column=0) self.feature_false = ttk.Button(self.root, text="0",command=lambda: self.feature_pressed("0"), width= self.btn_width).grid(row=3, column=0) self.bug_true = ttk.Button(self.root, text="1",command=lambda: self.bug_pressed("1"), width= self.btn_width).grid(row = 2, column=1) self.bug_false = ttk.Button(self.root, text="0",command=lambda: self.bug_pressed("0"), width= self.btn_width).grid(row = 3, column=1) self.aspect_a = ttk.Button(self.root, text="A: ASPECT HERE",command=lambda: self.aspect_pressed("A"), width= self.btn_width).grid(row = 2, column=2) self.aspect_s = ttk.Button(self.root, text="S: ASPECT HERE", command=lambda: self.aspect_pressed("S"), width= self.btn_width).grid(row = 3, column=2) self.aspect_d = ttk.Button(self.root, text="D: ASPECT HERE", command=lambda: self.aspect_pressed("D"), width= self.btn_width).grid(row = 4, column=2) self.aspect_f = ttk.Button(self.root, text="F: ASPECT HERE", command=lambda: self.aspect_pressed("F"), width= self.btn_width).grid(row = 5, column=2) self.aspect_g = ttk.Button(self.root, text="G: ASPECT HERE", command=lambda: self.aspect_pressed("G"), width= self.btn_width).grid(row = 6, column=2) self.aspect_h = ttk.Button(self.root, text="H: ASPECT HERE", command=lambda: self.aspect_pressed("H"), width= self.btn_width).grid(row = 7, column=2) self.aspect_j = ttk.Button(self.root, text="J: ASPECT HERE", command=lambda: self.aspect_pressed("J"), width= self.btn_width).grid(row = 8, column=2) self.aspect_k = ttk.Button(self.root, text="K: ASPECT HERE", command=lambda: self.aspect_pressed("K"), width= self.btn_width).grid(row = 9, column=2) self.aspect_l = ttk.Button(self.root, text="L: ASPECT HERE", command=lambda: self.aspect_pressed("L"), width= self.btn_width).grid(row = 10, column=2) self.aspect_positive = ttk.Button(self.root, text="A: Positive", command=lambda: self.sentiment_pressed("A"), width= self.btn_width).grid(row=2, column=3) self.aspect_neutral = ttk.Button(self.root, text="S: Neutral", command=lambda: self.sentiment_pressed("S"), width= self.btn_width).grid(row=3, column=3) self.aspect_negative = ttk.Button(self.root, text="D: Negative", command=lambda: self.sentiment_pressed("D"), width= self.btn_width).grid(row=4, column=3) # keys self.root.bind("q", self.quit_app) self.root.bind("", self.try_submit) self.root.bind("1", self.handle_key) self.root.bind("0", self.handle_key) self.root.bind("a", self.handle_key) self.root.bind("s", self.handle_key) self.root.bind("d", self.handle_key) self.root.bind("f", self.handle_key) self.root.bind("g", self.handle_key) self.root.bind("h", self.handle_key) self.root.bind("j", self.handle_key) self.root.bind("k", self.handle_key) self.root.bind("l", self.handle_key) self.load_review_data("data/uber_reviews_sampled.csv") # self.load_review_data("data/uber_reviews_tagged.csv") self.display_next_review() # self.save_tags("data/uber_reviews_tagged.csv") self.root.mainloop() def handle_key(self, event): key = event.char # Column 0 or 1: feature/bug (1 and 0) if key in ['1', '0']: if self.active_column == 0: self.feature_pressed(key) elif self.active_column == 1: self.bug_pressed(key) # Column 2: aspects (a,s,d,f,g,h,j,k,l) elif key in 'asdfghjkl' and self.active_column == 2: self.aspect_pressed(key.upper()) # Column 3: sentiment (a,s,d) elif key in 'asd' and self.active_column == 3: self.sentiment_pressed(key.upper()) def move_highlight(self, row, col): """Move the highlight box directly under the button pressed.""" self.highlight.grid(row=row, column=col) self.highlight.grid() # make sure it’s visible def feature_pressed(self, value): self.review_data.at[self.current_review_index, "feature_request"] = value self.active_column = 1 self.move_highlight(self.number_of_aspects + 2, 1) def bug_pressed(self, value): self.review_data.at[self.current_review_index, "bug_report"] = value self.active_column = 2 self.move_highlight(self.number_of_aspects + 2, 2) def aspect_pressed(self, value): self.review_data.at[self.current_review_index, "aspect"] = value self.active_column = 3 self.move_highlight(self.number_of_aspects + 2, 3) def sentiment_pressed(self, value): self.review_data.at[self.current_review_index, "aspect_sentiment"] = value self.active_column = 0 # Reset for next review def load_review_data(self, data_path): """Load review data from a CSV file.""" self.review_data = pd.read_csv(data_path, low_memory=False) if "tagged" not in self.review_data.columns: self.review_data["tagged"] = 0 # Initialize tagged column if not present if "feature_request" not in self.review_data.columns: self.review_data["feature_request"] = "" # Initialize feature_request column if not present if "bug_report" not in self.review_data.columns: self.review_data["bug_report"] = "" # Initialize bug_report column if not present if "aspect" not in self.review_data.columns: self.review_data["aspect"] = "" # Initialize aspect column if not present if "aspect_sentiment" not in self.review_data.columns: self.review_data["aspect_sentiment"] = "" # Initialize aspect_sentiment column if not present print(f"Loaded {len(self.review_data)} reviews from {data_path}") def display_next_review(self): """Display the next review in the text box.""" self.current_review_index = self.get_current_review_index() if self.current_review_index < len(self.review_data): review = self.review_data.iloc[self.current_review_index] self.display_review.delete(1.0, tk.END) # Clear the text box self.display_review.insert(tk.END, review["review_description"]) # Display the review text # self.current_review_index += 1 # Mark as tagged # self.review_data.at[self.current_review_index - 1, "tagged"] = 1 self.active_column = 0 # reset to start at feature request self.move_highlight(self.number_of_aspects + 2, 0) else: print("No more reviews to display.") def submit_tag(self): self.review_data.at[self.current_review_index, "tagged"] = 1 self.save_tags("data/uber_reviews_tagged.csv") self.display_next_review() def try_submit(self, event): """Try to submit current review if all labels complete.""" if self.all_labels_complete(): self.submit_tag() self.move_highlight(self.number_of_aspects + 2, 0) print("Labels submitted, loading next review") else: print("Please complete all labels before submitting") def all_labels_complete(self): row = self.review_data.iloc[self.current_review_index] return (row["feature_request"] != "" and row["bug_report"] != "" and row["aspect"] != "" and row["aspect_sentiment"] != "") def save_tags(self, save_path): """Save the tagged data to a CSV file.""" self.review_data.to_csv(save_path, index=False) print(f"Tagged data saved to {save_path}") def quit_app(self, event): self.root.destroy() self.save_tags("data/uber_reviews_tagged.csv") def get_current_review_index(self): for i in range(len(self.review_data)): if self.review_data.iloc[i]["tagged"] == 0: return i return self.review_data.shape[0] # all reviews tagged app = MultiTag()