Files
ReClass/multitag/multitag.py

204 lines
9.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# TODO: Refactor,especially change expected names as I jumped the gun when first making this without sampling properly
# TODO: Add button labels and finalise the categories of aspects
# TODO: Ensure there is persistent progress tracking implentation before labelling
# TODO: Finalise keybinds
# TODO: Display progress e.g. review 1020 of 5000
# TODO: Validate saving progres
# TODO: Loop instead of pressing enter
# TODO: Autosave ? / confirm quit at least
# TODO: More visual q's
import tkinter as tk
from tkinter import ttk
import pandas as pd
"""
app to classify / manually annotate reviews for ml training
currently has hotkeys for each option 1 0 asdfghjkl
path must be to tagged not sampled, it wont remember
"""
class MultiTag:
def __init__(self):
self.root = tk.Tk()
# root.geometry("400x300")
self.active_column = 0 # used for highlighting the current column
self.btn_width = 15 # button width
self.number_of_aspects = 9 # number of aspect buttons
self.root.title("MultiTag")
self.display_review = tk.Text(self.root, height=20, width=100, wrap='word')
self.display_review.grid(row=0, column=0, columnspan=4, padx=10, pady=10)
# highlight for the current box
self.highlight = tk.Frame(self.root, bg="#003366", height=20, width=130)
self.highlight.grid(row=11, column=0)
# Labels
ttk.Label(self.root, text="Feature Request ? 1 (yes), 0 (no)").grid(row= 1, column=0)
ttk.Label(self.root, text="Bug Report ? 1 (yes), 0 (no)").grid(row= 1, column=1)
ttk.Label(self.root, text="Aspect ? A/S/D/F/G/H/J/K/L ").grid(row= 1, column=2)
ttk.Label(self.root, text="Aspect Sentiment ? A/S/D").grid(row= 1, column=3)
self.feature_true = ttk.Button(self.root, text="1",command=lambda: self.feature_pressed("1"), width= self.btn_width).grid(row=2, column=0)
self.feature_false = ttk.Button(self.root, text="0",command=lambda: self.feature_pressed("0"), width= self.btn_width).grid(row=3, column=0)
self.bug_true = ttk.Button(self.root, text="1",command=lambda: self.bug_pressed("1"), width= self.btn_width).grid(row = 2, column=1)
self.bug_false = ttk.Button(self.root, text="0",command=lambda: self.bug_pressed("0"), width= self.btn_width).grid(row = 3, column=1)
self.aspect_a = ttk.Button(self.root, text="A: ASPECT HERE",command=lambda: self.aspect_pressed("A"), width= self.btn_width).grid(row = 2, column=2)
self.aspect_s = ttk.Button(self.root, text="S: ASPECT HERE", command=lambda: self.aspect_pressed("S"), width= self.btn_width).grid(row = 3, column=2)
self.aspect_d = ttk.Button(self.root, text="D: ASPECT HERE", command=lambda: self.aspect_pressed("D"), width= self.btn_width).grid(row = 4, column=2)
self.aspect_f = ttk.Button(self.root, text="F: ASPECT HERE", command=lambda: self.aspect_pressed("F"), width= self.btn_width).grid(row = 5, column=2)
self.aspect_g = ttk.Button(self.root, text="G: ASPECT HERE", command=lambda: self.aspect_pressed("G"), width= self.btn_width).grid(row = 6, column=2)
self.aspect_h = ttk.Button(self.root, text="H: ASPECT HERE", command=lambda: self.aspect_pressed("H"), width= self.btn_width).grid(row = 7, column=2)
self.aspect_j = ttk.Button(self.root, text="J: ASPECT HERE", command=lambda: self.aspect_pressed("J"), width= self.btn_width).grid(row = 8, column=2)
self.aspect_k = ttk.Button(self.root, text="K: ASPECT HERE", command=lambda: self.aspect_pressed("K"), width= self.btn_width).grid(row = 9, column=2)
self.aspect_l = ttk.Button(self.root, text="L: ASPECT HERE", command=lambda: self.aspect_pressed("L"), width= self.btn_width).grid(row = 10, column=2)
self.aspect_positive = ttk.Button(self.root, text="A: Positive", command=lambda: self.sentiment_pressed("A"), width= self.btn_width).grid(row=2, column=3)
self.aspect_neutral = ttk.Button(self.root, text="S: Neutral", command=lambda: self.sentiment_pressed("S"), width= self.btn_width).grid(row=3, column=3)
self.aspect_negative = ttk.Button(self.root, text="D: Negative", command=lambda: self.sentiment_pressed("D"), width= self.btn_width).grid(row=4, column=3)
# keys
self.root.bind("q", self.quit_app)
self.root.bind("<Return>", self.try_submit)
self.root.bind("1", self.handle_key)
self.root.bind("0", self.handle_key)
self.root.bind("a", self.handle_key)
self.root.bind("s", self.handle_key)
self.root.bind("d", self.handle_key)
self.root.bind("f", self.handle_key)
self.root.bind("g", self.handle_key)
self.root.bind("h", self.handle_key)
self.root.bind("j", self.handle_key)
self.root.bind("k", self.handle_key)
self.root.bind("l", self.handle_key)
self.load_review_data("data/uber_reviews_sampled.csv")
# self.load_review_data("data/uber_reviews_tagged.csv")
self.display_next_review()
# self.save_tags("data/uber_reviews_tagged.csv")
self.root.mainloop()
def handle_key(self, event):
key = event.char
# Column 0 or 1: feature/bug (1 and 0)
if key in ['1', '0']:
if self.active_column == 0:
self.feature_pressed(key)
elif self.active_column == 1:
self.bug_pressed(key)
# Column 2: aspects (a,s,d,f,g,h,j,k,l)
elif key in 'asdfghjkl' and self.active_column == 2:
self.aspect_pressed(key.upper())
# Column 3: sentiment (a,s,d)
elif key in 'asd' and self.active_column == 3:
self.sentiment_pressed(key.upper())
def move_highlight(self, row, col):
"""Move the highlight box directly under the button pressed."""
self.highlight.grid(row=row, column=col)
self.highlight.grid() # make sure its visible
def feature_pressed(self, value):
self.review_data.at[self.current_review_index, "feature_request"] = value
self.active_column = 1
self.move_highlight(self.number_of_aspects + 2, 1)
def bug_pressed(self, value):
self.review_data.at[self.current_review_index, "bug_report"] = value
self.active_column = 2
self.move_highlight(self.number_of_aspects + 2, 2)
def aspect_pressed(self, value):
self.review_data.at[self.current_review_index, "aspect"] = value
self.active_column = 3
self.move_highlight(self.number_of_aspects + 2, 3)
def sentiment_pressed(self, value):
self.review_data.at[self.current_review_index, "aspect_sentiment"] = value
self.active_column = 0 # Reset for next review
def load_review_data(self, data_path):
"""Load review data from a CSV file."""
self.review_data = pd.read_csv(data_path, low_memory=False)
if "tagged" not in self.review_data.columns:
self.review_data["tagged"] = 0 # Initialize tagged column if not present
if "feature_request" not in self.review_data.columns:
self.review_data["feature_request"] = "" # Initialize feature_request column if not present
if "bug_report" not in self.review_data.columns:
self.review_data["bug_report"] = "" # Initialize bug_report column if not present
if "aspect" not in self.review_data.columns:
self.review_data["aspect"] = "" # Initialize aspect column if not present
if "aspect_sentiment" not in self.review_data.columns:
self.review_data["aspect_sentiment"] = "" # Initialize aspect_sentiment column if not present
print(f"Loaded {len(self.review_data)} reviews from {data_path}")
def display_next_review(self):
"""Display the next review in the text box."""
self.current_review_index = self.get_current_review_index()
if self.current_review_index < len(self.review_data):
review = self.review_data.iloc[self.current_review_index]
self.display_review.delete(1.0, tk.END) # Clear the text box
self.display_review.insert(tk.END, review["review_description"]) # Display the review text
# self.current_review_index += 1
# Mark as tagged
# self.review_data.at[self.current_review_index - 1, "tagged"] = 1
self.active_column = 0 # reset to start at feature request
self.move_highlight(self.number_of_aspects + 2, 0)
else:
print("No more reviews to display.")
def submit_tag(self):
self.review_data.at[self.current_review_index, "tagged"] = 1
self.save_tags("data/uber_reviews_tagged.csv")
self.display_next_review()
def try_submit(self, event):
"""Try to submit current review if all labels complete."""
if self.all_labels_complete():
self.submit_tag()
self.move_highlight(self.number_of_aspects + 2, 0)
print("Labels submitted, loading next review")
else:
print("Please complete all labels before submitting")
def all_labels_complete(self):
row = self.review_data.iloc[self.current_review_index]
return (row["feature_request"] != "" and
row["bug_report"] != "" and
row["aspect"] != "" and
row["aspect_sentiment"] != "")
def save_tags(self, save_path):
"""Save the tagged data to a CSV file."""
self.review_data.to_csv(save_path, index=False)
print(f"Tagged data saved to {save_path}")
def quit_app(self, event):
self.root.destroy()
self.save_tags("data/uber_reviews_tagged.csv")
def get_current_review_index(self):
for i in range(len(self.review_data)):
if self.review_data.iloc[i]["tagged"] == 0:
return i
return self.review_data.shape[0] # all reviews tagged
app = MultiTag()