From 487be5cd274abcb8a5cf41f3f6d4b4d51e7517ca Mon Sep 17 00:00:00 2001 From: charlie-rasberry Date: Fri, 19 Dec 2025 07:14:13 +0000 Subject: [PATCH] Everything is good to go for annotations. --- .../notebooks/datasets_reviews.ipynb | 8 ++--- .../{ => notebooks}/preprocessing_uber.ipynb | 32 ++++++++++++++++--- multitag/{ => notebooks}/uber_cleaned.ipynb | 0 multitag/shell.nix | 16 ---------- multitag/src/infer.py | 0 multitag/{ => src}/multitag.py | 0 multitag/{ => src}/preprocess.py | 2 ++ multitag/{ => src}/sampler.py | 0 multitag/src/train.py | 0 9 files changed, 33 insertions(+), 25 deletions(-) rename datasets_reviews.ipynb => multitag/notebooks/datasets_reviews.ipynb (99%) rename multitag/{ => notebooks}/preprocessing_uber.ipynb (95%) rename multitag/{ => notebooks}/uber_cleaned.ipynb (100%) delete mode 100644 multitag/shell.nix create mode 100644 multitag/src/infer.py rename multitag/{ => src}/multitag.py (100%) rename multitag/{ => src}/preprocess.py (99%) rename multitag/{ => src}/sampler.py (100%) create mode 100644 multitag/src/train.py diff --git a/datasets_reviews.ipynb b/multitag/notebooks/datasets_reviews.ipynb similarity index 99% rename from datasets_reviews.ipynb rename to multitag/notebooks/datasets_reviews.ipynb index d71bf3a..3668e1a 100644 --- a/datasets_reviews.ipynb +++ b/multitag/notebooks/datasets_reviews.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "0c897ead-dfb5-4d18-bcfc-949824a0868f", "metadata": {}, "outputs": [], @@ -348,7 +348,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "reclass", "language": "python", "name": "python3" }, @@ -362,7 +362,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.0" + "version": "3.14.2" } }, "nbformat": 4, diff --git a/multitag/preprocessing_uber.ipynb b/multitag/notebooks/preprocessing_uber.ipynb similarity index 95% rename from multitag/preprocessing_uber.ipynb rename to multitag/notebooks/preprocessing_uber.ipynb index 5f5e245..95e36d7 100644 --- a/multitag/preprocessing_uber.ipynb +++ b/multitag/notebooks/preprocessing_uber.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "id": "470fe7c6-1614-4daf-879f-e6c399117c7b", "metadata": {}, "outputs": [], @@ -13,12 +13,34 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, + "id": "afe1168c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cwd: /mnt/c/Users/ch/6013/multitag/notebooks\n", + "exists data: False\n" + ] + } + ], + "source": [ + "import os\n", + "print(\"cwd:\", os.getcwd())\n", + "print(\"exists data:\", os.path.exists(\"mullitag\"))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "b855045e-2dd1-4fa1-ab5a-8ce8b50b02ee", "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv('data/uber_reviews.csv', low_memory=False)" + "\n", + "df = pd.read_csv('../data/uber_reviews.csv', low_memory=False)" ] }, { @@ -437,7 +459,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "reclass", "language": "python", "name": "python3" }, @@ -451,7 +473,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.14.0" + "version": "3.14.2" } }, "nbformat": 4, diff --git a/multitag/uber_cleaned.ipynb b/multitag/notebooks/uber_cleaned.ipynb similarity index 100% rename from multitag/uber_cleaned.ipynb rename to multitag/notebooks/uber_cleaned.ipynb diff --git a/multitag/shell.nix b/multitag/shell.nix deleted file mode 100644 index 168277a..0000000 --- a/multitag/shell.nix +++ /dev/null @@ -1,16 +0,0 @@ -{ pkgs ? import {} }: - -pkgs.mkShell { - buildInputs = with pkgs; [ - python313 - python313Packages.tkinter - python313Packages.pandas - python313Packages.numpy - ]; - - - shellHook = '' - echo "Development environment loaded" - echo "Python: $(python --version)" - ''; -} \ No newline at end of file diff --git a/multitag/src/infer.py b/multitag/src/infer.py new file mode 100644 index 0000000..e69de29 diff --git a/multitag/multitag.py b/multitag/src/multitag.py similarity index 100% rename from multitag/multitag.py rename to multitag/src/multitag.py diff --git a/multitag/preprocess.py b/multitag/src/preprocess.py similarity index 99% rename from multitag/preprocess.py rename to multitag/src/preprocess.py index dcdffe0..d80b034 100644 --- a/multitag/preprocess.py +++ b/multitag/src/preprocess.py @@ -1,3 +1,5 @@ +# preprocess.py + import pandas as pd import re from langdetect import detect, LangDetectException diff --git a/multitag/sampler.py b/multitag/src/sampler.py similarity index 100% rename from multitag/sampler.py rename to multitag/src/sampler.py diff --git a/multitag/src/train.py b/multitag/src/train.py new file mode 100644 index 0000000..e69de29