diff --git a/README.md b/README.md index a5f112e..419715c 100644 --- a/README.md +++ b/README.md @@ -24,26 +24,28 @@ RECLASS is a multi-task learning system which uses a shared BERT encoder with ta ## Repository Structure +``` +## Repository Structure ``` 6013/ -README.md -requirements.txt - multitag/ - data/ - uber_reviews.csv # Raw dataset - uber_reviews_cleaned.csv # Preprocessed reviews - uber_reviews_sampled.csv # Stratified sample for annotation - uber_reviews_tagged.csv # Annotated reviews (in progress) - notebooks/ - datasets_reviews.ipynb # Initial data exploration - preprocessing_uber.ipynb # Preprocessing analysis - uber_cleaned.ipynb # Cleaned data verification - src/ - preprocess.py # Text cleaning and filtering pipeline - sampler.py # Stratified sampling strategies - multitag.py # GUI annotation tool - train.py # Model training (in progress) - infer.py # Inference pipeline (in progress) + README.md + .gitignore + data/ + uber_reviews.csv # Raw dataset + uber_reviews_cleaned.csv # Preprocessed reviews + uber_reviews_sampled.csv # Stratified sample for annotation + uber_reviews_tagged.csv # Annotated reviews (in progress) + notebooks/ + preprocessing_uber.ipynb # Preprocessing analysis + uber_cleaned.ipynb # Cleaned data verification + src/ + preprocess.py # Text cleaning and filtering pipeline + sampler.py # Stratified sampling strategies + multitag.py # GUI annotation tool + train.py # Model training (in progress) + infer.py # Inference pipeline (in progress) + outputs/ + figures/ ``` ## Current Progress diff --git a/notebooks/preprocessing_uber.ipynb b/notebooks/preprocessing_uber.ipynb index 69838e6..ca12d76 100644 --- a/notebooks/preprocessing_uber.ipynb +++ b/notebooks/preprocessing_uber.ipynb @@ -1,8 +1,23 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "f4474e0f", + "metadata": {}, + "source": [ + "# Preprocessing Requirements\n", + "## RECLASS\n", + "\n", + "**Purpose**: Ensure samples are consistent with the original dataset and find issues with current sampling/preprocessing methods.\n", + "\n", + "**Dataset**: Uber Customer Reviews from Google Play (Kaggle)\n", + "\n", + "---" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 23, "id": "470fe7c6-1614-4daf-879f-e6c399117c7b", "metadata": {}, "outputs": [], @@ -13,7 +28,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 24, "id": "afe1168c", "metadata": {}, "outputs": [ @@ -34,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 25, "id": "b855045e-2dd1-4fa1-ab5a-8ce8b50b02ee", "metadata": {}, "outputs": [], @@ -45,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 26, "id": "e7da1fb6-ede6-46c6-8fbd-fa491d3351c5", "metadata": {}, "outputs": [ @@ -200,7 +215,7 @@ "4 4.486.10002 en in " ] }, - "execution_count": 14, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -211,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 27, "id": "5c02ec54-4583-4720-88c6-1110b52c3f88", "metadata": {}, "outputs": [ @@ -235,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 28, "id": "1da5d625-a4ba-49f8-8314-cc9e0f4ef96a", "metadata": {}, "outputs": [ @@ -262,7 +277,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 29, "id": "1c97e396-8f05-4df7-bd0a-1bbecf6911b4", "metadata": {}, "outputs": [], @@ -272,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 30, "id": "55324c94-4944-4844-b00e-dc08c8989f7b", "metadata": {}, "outputs": [ @@ -291,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 31, "id": "c45959fe-3e23-4831-a41a-94c89892247f", "metadata": {}, "outputs": [ @@ -326,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 32, "id": "bf14e3db-a1b4-4fad-8102-b7ac25feeefa", "metadata": {}, "outputs": [ @@ -344,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 33, "id": "8ccc07fa-9913-4047-ae17-35d2454eb059", "metadata": {}, "outputs": [ @@ -357,66 +372,67 @@ "1 STAR REVIEWS:\n", "==========================================\n", "\n", - "Driver come very late than you can't do anthing pay and wait. Very bad service you can't cancil your when driver not come. Ubar charge money without service. This is very bad bad bad. So take local au...\n", - "(Length: 47.0 words)\n", + "Many times driver cancelled the ride only because he didn't wanted to go to my destination and I was supposed to pay the cancellation fees without any reason.\n", + "(Length: 28.0 words)\n", "\n", - "I have uninstalled and reinstalled the app around 5 times over the course of the past 3 days. Every time I try to use the app, I get stuck in and endless reCaptcha loop. (I enter my phone number, solv...\n", - "(Length: 66.0 words)\n", + "Their drivers are always finding new ways to outsmart customers.When Uber started initally, it was a pleasure to use their services.Now either when you book it says a few minutes n the moment you conf...\n", + "(Length: 98.0 words)\n", "\n", - "Thieves. Sent an Uber to my house in the middle of the night and wouldn't refund.\n", - "(Length: 16.0 words)\n", + "terrible GPS system. takes you the long way everywhere. seriously, Waze, google maps and pretty much every other GPS shows faster routes. please fix this.\n", + "(Length: 25.0 words)\n", "\n", "==========================================\n", "2 STAR REVIEWS:\n", "==========================================\n", "\n", - "Your app is required to much space\n", - "(Length: 7.0 words)\n", + "no helpline number, customer is unable to contect in case of emegrncy\n", + "(Length: 12.0 words)\n", "\n", - "I'm very disappointed. At first,I used Uber because it was far better than regular taxi. But I stopped using it because the application is very heavy and the drivers rarely reached my pinned locatio...\n", - "(Length: 107.0 words)\n", + "Ghaantaa tum threk nhi kr skte u r just lying\n", + "(Length: 10.0 words)\n", "\n", - "nowhere to leave a tip!\n", - "(Length: 5.0 words)\n", + "Nice application 😘😘\n", + "(Length: 3.0 words)\n", "\n", "==========================================\n", "3 STAR REVIEWS:\n", "==========================================\n", "\n", - "اوبر المدينة احيانا كويس .. بس لما يكون السائق باخر ملك ربي و تنتظر 14 دقيقه و بعدين يلغي و تصير دخلت بوقت الذروة المفروض يكون في تعويض .. زي لما تلغي انت .\n", - "(Length: 34.0 words)\n", + "The app is good but I got charged for a cancelation because the driver was going to make me walk a block to go to him... what's the point in the app if I have to go to them\n", + "(Length: 39.0 words)\n", "\n", - "Good application\n", - "(Length: 2.0 words)\n", + "Final amount to pay in cash doesn't always appear correct on app. You can't challenge the cost or question it. Example toll. They over charged by 60% of original cost and won't review it properly. Whe...\n", + "(Length: 59.0 words)\n", "\n", - "Toooslooow\n", - "(Length: 1.0 words)\n", + "Location of the driver's car is not updated properly . I'm using android, and the location is keep being update all the time . Please fix this problem .\n", + "(Length: 29.0 words)\n", "\n", "==========================================\n", "4 STAR REVIEWS:\n", "==========================================\n", "\n", - "Help full\n", - "(Length: 2.0 words)\n", + "Good\n", + "(Length: 1.0 words)\n", "\n", - "Won't allow me to change my payment details. Update: Problem solved.\n", - "(Length: 11.0 words)\n", + "I like that app 😍🙃\n", + "(Length: 5.0 words)\n", "\n", - "Very good\n", - "(Length: 2.0 words)\n", + "it is very difficult to contact the chief operator if there is any \n", + "problem...we are not clear as to whom to contact if problem with uber driver\n", + "(Length: 27.0 words)\n", "\n", "==========================================\n", "5 STAR REVIEWS:\n", "==========================================\n", "\n", - "Good driving skills\n", - "(Length: 3.0 words)\n", + "I had a great uber experience at kolkata good experience.\n", + "(Length: 10.0 words)\n", "\n", - "Lovery\n", + "Nice\n", "(Length: 1.0 words)\n", "\n", - "Excellent experience\n", - "(Length: 2.0 words)\n" + "It's an awesome aap\n", + "(Length: 4.0 words)\n" ] } ], diff --git a/notebooks/rating_distribution.ipynb b/notebooks/rating_distribution.ipynb new file mode 100644 index 0000000..326da5e --- /dev/null +++ b/notebooks/rating_distribution.ipynb @@ -0,0 +1,96 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b955467b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " review rating word_count\n", + "0 their have many problem but also best service 5 8\n", + "1 it's excellent i loved it thank you uber 5 8\n", + "2 it does the job as it should be, in a nice way! 5 12\n", + "3 i support my family members with the help of uber 5 10\n", + "4 it's good bt it is only.for 1 man or woman 5 10\n", + "review object\n", + "rating int64\n", + "word_count int64\n", + "dtype: object\n", + " count percentage\n", + "rating \n", + "1 1325 26.51\n", + "2 195 3.90\n", + "3 235 4.70\n", + "4 390 7.80\n", + "5 2854 57.09\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "df = pd.read_csv(\"../data/uber_reviews_sampled.csv\")\n", + "print(df.head())\n", + "print(df.dtypes)\n", + "rating_counts = df[\"rating\"].value_counts().sort_index()\n", + "rating_percent = df[\"rating\"].value_counts(normalize=True).sort_index() * 100\n", + "rating_dist = pd.DataFrame({\n", + " \"count\": rating_counts,\n", + " \"percentage\": rating_percent.round(2)\n", + "})\n", + "print(rating_dist)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7d66560", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAHFCAYAAAAT5Oa6AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAN3JJREFUeJzt3QeYU9X67/F3KDPA0DtIm0OTXqVJld7bERD/CEjzTxPpiMLgUZoKKCgHgUMvHpWOdAEPvQiHIiJIl450kDLkPu+6N7mTTGECGScz6/t5ns0kOyvJyk6Y/Ga1HeBwOBwCAABgsURxXQEAAIC4RiACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAJiYObMmRIQEODakiRJItmyZZM2bdrIsWPHnvkYjhw5UpYsWRJh/6ZNm8zz6E9/OwanTp1y7atevbrZvPHzzz9LaGio2+PEhOdz6f21Pp988on4kj+/JzVr1pS33npL4rvIPkvPSz9T+phOjx49krx588qECRN89hxI2AhEgBdmzJgh27dvl/Xr10vPnj1l2bJlUrlyZbl+/bpPv3xLly5tnkd/+rMvv/zSbN4GohEjRnj9Zfgsz5WQ3pOlS5fK1q1b5f3334+T549vkiZNKsOGDZMPPvhArl27FtfVQTxAIAK8ULRoUalQoYJpqRg6dKgMHjxYLl++HOkX6PNInTq1eR796c8KFy5stth07969v+y5/Pk90aDWvHlzeeGFF+Lk+eOj1157zbQaTZkyJa6rgniAQAQ8h7Jly5qfly5dcu37888/pV+/flKyZElJkyaNpE+fXipWrGj+wg9Pf1HfvXtXZs2a5eqKc3YJRdY906FDB0mZMqUcP35cGjRoYC7nzJnTPNeDBw/cHvvcuXPy97//XVKlSiVp06aV119/XXbv3m0eU7srnmbHjh3y8ssvS7JkySR79uwyZMgQ0wXhKbIus8mTJ0uJEiVM/fT5X3zxRXn33XfNbfrcr776qrlco0YN1+t21kkfS0Pnjz/+KJUqVZIUKVLIm2++GeVzqSdPnshHH30kuXLlMvXV92TDhg1uZfTY5cmT56ndLN6+J0pbCfX91brq661du7ZpSYrseQ4fPmy+pPVzkSVLFvPabt68+dT3Y9++fbJr1y5p165dhLDYv39/CQkJMa9dP2v6+hcsWOAqs2fPHtO1q68/efLk5qfW4fTp05F2Y/3www/SpUsXyZAhgwl/b7zxhjkmFy9elFatWpnPk3YX6/OG/0w4uzDHjh371PcjKtryqt2C+rx6PPUzGNl9V65caf5/BQUFmdceVbdpYGCgtG7dWr766ivhPOZ4miRPLQEgSidPnjQ/CxQo4Nqn4eSPP/4wXxj61/zDhw/NL/oWLVqYLjf9glH6pfnKK6+YYODsBnla64N+ATVp0kQ6depkgpAGh3/84x/mC1a7B5R+eeljah3GjBkj+fLlk9WrV5svhph2aemXkn5x6pekfjFpV9X8+fOfet+FCxdK9+7dpVevXuZLKlGiRCbA6WOqhg0bmpYODUhffPGFq/tJx3o4XbhwQf7nf/5HBg4caMrqY0Rn0qRJkjt3bjNWRMORfiHXr19fNm/ebIKKN7x9T/SYaNisU6eOCSH63uvza4jSL3LtTg2vZcuW5n3Q9+/gwYMmaKp//etf0dZrxYoVkjhxYqlatarb/r59+8qcOXPkww8/lFKlSpn3/tChQ25dRBpUChYsaEKRBiY9vhpaX3rpJfO+ZMyY0e0xO3fubD6r+l5qENP36vHjx3L06FGzv2vXrubzrJ8tDctaB1+8H3PnzjX/N5o2bWoCqXZ5actO3bp1Zc2aNeYzqfS4ahl9LK1jWFiYeY7wf5SEp++Fvl49LsWKFYv2OMNyDgBPNWPGDIf+d9mxY4fj0aNHjtu3bztWr17tyJo1q6Nq1apmX1QeP35sbu/UqZOjVKlSbrcFBwc72rdvH+E+GzduNM+nP520nO7797//7Va2QYMGjoIFC7quf/HFF6bcqlWr3Mp169bN7NfXEp3WrVs7kidP7rh48aLba3jxxRfN/U+ePOnaX61aNbM59ezZ05E2bdpoH/+bb76J8NrCP57etmHDhkhvC/9cWg8tmz17dsf9+/dd+2/duuVInz69o1atWm7HLnfu3BEec/jw4eYxnuU9CQsLM89drFgxc9lJPxuZM2d2VKpUKcLzjB071u0xu3fv7kiWLJnjyZMnjujUr1/fHH9PRYsWdTRr1szhDX0v79y5Y17nZ599FuEz3qtXL7fy+vi6f9y4cW77S5Ys6ShduvQzvR/O53J+lu7evWvKNG7c2O059LiWKFHCUa5cOde+8uXLR/kckX2lHTt2zOyfPHmyV8cJ9qHLDPCCjiHRv1y1a6RevXqSLl060xWms87C++abb0xzv3Yb6W16n+nTp8uRI0ee63hrl0Tjxo3d9hUvXtyt+0P/EnfWLzztJomJjRs3mr/GtUvHSVsnYtLCVK5cOblx44Z5Lj0uV69eFW/pMdVWmpjSVgvtmnHS167HSFvPtPUgtmiLyfnz5003VvhWLH3PtSVIux2d45+ctHXP873TLlYdhxYdfZ7MmTNHerxXrVplxrJpV979+/cjlLlz544MGjTItBTqZ1E3raO2JkX2eWzUqJHb9UKFCrla9zz3e3a7Pev7sW3bNtOi2b59e9Ma5dy0hUk/x9rdq/XVTS9H9RyRcR6333//PdLbAScCEeCF2bNnm1/IOs6iW7du5gvFM2gsWrTIjLXQ7jLtBtBuGL2PjhfRL7/nod1X4b8IlI6jCP+42l0SPsw4RbYvMnr/rFmzRtgf2T5PGg60+0e/KDUU6JdR+fLlZd26dRJTOj7FG1HVVbsqNQzEFme3VGT11a4k/TL3nH2o43I83zsVWZAJT2/3fN/V559/bsKODurXbj7tEmvWrJnbUhBt27Y13VjaFaZdTzoWST+PmTJlivR59TE8x+FEtT+yz/OzvB/O7i4d96Z/PITftGtOx/9oYNLjqcfVm8+n87g97RgDjCECvKB/FTsHUusXkP7FO23aNPn222/NL3OlIUgHen799dduA3Y9Bz7HFv3S1S89TzooNqb3j6xsTO/fsWNHs+lf89oqMHz4cNPq8Ouvv5qxJU8T/pjFRFR11S9sbQlxfilGdvyfpQXLM9zomJzIWnS01Uhbu3xBx/loIPAUHBxsljDQTUOFs7VIW0t++eUXM2Bbxx/pe6D7Pce5xYaYvB+enOOYJk6caFphI6OBXsfQ6efDm8+n83V6jpUCPNFCBDwHHcypX3o6oFn/clX6C1t/+Yf/Ytdf1p6zzJwtBL7+y7VatWpy+/Zt8+UYng5AjQkNejpwNfwgVQ1+GvC8oV/WOphWlyfQ1gGdYeVNq0hMaYtc+JYKfe3Lly+XKlWqmK4+pQPEtVsq/GvSOmmLybO+JzpQWVsBdWB1+BlMGgS/++4718wzX9CZeidOnIi2jAYGnU2nLZbanafddfoZ1Lo5j7mThvjY6k6MyfvhSbuXdfaaDvLWPzgi2/T/lH6mtJswqueIjPO4xeWSDYgfaCECnoOGIZ0ppDOi9ItRZ0dpa4j+wtbZVtpqdPbsWTMTTLtWPFe11lkvOvZDf5nr7ToWQr9on4eOwxg/frypi84+0rEjGo6cX/5Pm7X13nvvmankOo5Hg55+qeuMMP2ifxqdrq1Tu/ULTl+PBsFRo0aZWXA6q0nptHqlU6H19WrrjbaoeXYnxZR+yepUd53tpKFUu1hu3bplWk2cdPyTvhadaTVgwADzZardTZGFgpi+J3ocNRDrLDN9z7ULVVtePv74YzOOavTo0eIrOlNKuyK1lS38jEbtjtTn1rFI+lnULlyddRY+jOnMNK2TtpBoMNQxZjqeTQNIbIjJ++FJW460dUg/u9qio/9vtLv1ypUr8t///tf81JliSv8v6bgifQ6daanvoT6HhqXIWr10LFdkM/SACOJ6VDcQHzhnxezevTvCbTrbJVeuXI78+fObGTxq9OjRjjx58jiCgoIchQoVckydOjXSGU379+93vPzyy44UKVKY25yzqKKaZaYzgzxF9rhnzpxxtGjRwpEyZUpHqlSpHC1btnR8//33ptzSpUuf+nq3bt3qqFChgqm/zqQbMGCA46uvvnrqLLNZs2Y5atSo4ciSJYsjMDDQzAZq1aqV48CBA26PP2HCBEdISIgjceLEbjPf9LGKFCkSaZ2immU2ZswYx4gRIxw5cuQwz6kz+dasWRPh/vr6dWaUzqD729/+5pg0adJzvydqyZIlZuaTzhbT96dmzZrm+IXnfJ4rV6647fecbRWVmzdvmvfSc5ba4MGDHWXLlnWkS5fOvFf6ut555x3H1atXXWXOnTtn3n8to5+FevXqOQ4dOmRm3YWfTRfVZzyqunt+Hr15P6J63Zs3b3Y0bNjQzBhLmjSp44UXXjDXdWZieMuWLXMUL17cPL7+39P/b5G9l6pKlSoRZq8BkQnQfyLGJAAJja7po60/Z86ckRw5csR1deAlXdtJuzK169HbcVZ/BV3vSFv6tDVK1+DyB7/99pvkz5/ftI5qixIQHbrMgARIZxU5x57oQFSdFaddRNqNRhiKnzTM6ixHHZ/kHMCP6GmXsS4hQRhCTBCIgARIx4/oOCL9q13HtehpFHR6tn6pIn7SQdPz5s175hMJ20bXMdIV0J2rgQNPQ5cZAACwHtPuAQCA9QhEAADAegQiAABgPQZVx5AuMKbL8esibf445RUAAESkqwvpauZ6jsHoFqYlEMWQhqGcOXPGtDgAAPAjetaA6JYdIRDFkLYMOQ9o6tSpffPuAACAWKWnjtEGDef3eFQIRDHk7CbTMEQgAgAgfnnacBcGVQMAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsl8T6IwAAQDTyDF7J8fkLnBrdUOISLUQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1ovTQDRq1Ch56aWXJFWqVJI5c2Zp1qyZHD161K1Mhw4dJCAgwG2rUKGCW5kHDx5Ir169JGPGjBIcHCxNmjSRc+fOuZW5fv26tGvXTtKkSWM2vXzjxo2/5HUCAAD/FqeBaPPmzdKjRw/ZsWOHrFu3Th4/fix16tSRu3fvupWrV6+eXLhwwbV9//33brf36dNHFi9eLAsXLpQtW7bInTt3pFGjRhIWFuYq07ZtW9m/f7+sXr3abHpZQxEAAECSuDwEGkzCmzFjhmkp2rt3r1StWtW1PygoSLJmzRrpY9y8eVOmT58uc+bMkVq1apl9c+fOlZw5c8r69eulbt26cuTIEfNcGrzKly9vykydOlUqVqxoWqQKFiwYq68TAAD4N78aQ6ThRqVPn95t/6ZNm0xQKlCggHTp0kUuX77suk3D06NHj0zLklP27NmlaNGism3bNnN9+/btppvMGYaUdrvpPmcZT9oNd+vWLbcNAAAkTH4TiBwOh/Tt21cqV65swoxT/fr1Zd68efLDDz/Ip59+Krt375ZXXnnFBBZ18eJFCQwMlHTp0rk9XpYsWcxtzjIaqDzpPmeZyMY3Occb6aYtTgAAIGGK0y6z8Hr27CkHDhwwY4DCa926teuyBqWyZctK7ty5ZeXKldKiRYtoA5YOwHYKfzmqMuENGTLEBDQnbSEiFAEAkDD5RQuRzhBbtmyZbNy4UXLkyBFt2WzZsplAdOzYMXNdxxY9fPjQzCILT7vVtJXIWebSpUsRHuvKlSuuMp503FLq1KndNgAAkDDFaSDSFhptGVq0aJHpEgsJCXnqfa5duyZnz541wUiVKVNGkiZNamapOelMtEOHDkmlSpXMdR08reOTdu3a5Sqzc+dOs89ZBgAA2CtOu8x0yv38+fNl6dKlZi0i53geHbOTPHlyM30+NDRUWrZsaQLQqVOn5N133zXrDTVv3txVtlOnTtKvXz/JkCGDGZDdv39/KVasmGvWWaFChczUfR2QPWXKFLOva9euZmo+M8wAAECcBqLJkyebn9WrV48w/V4XZEycOLEcPHhQZs+ebRZR1FBUo0YN+frrr02Acho/frwkSZJEWrVqJffv35eaNWvKzJkzzf2ddGB27969XbPRdPHGSZMm/WWvFQAA+K8Ah/Zb4al0ULW2Rmk3G+OJAMAeeQavjOsqWOHU6IZx+v3tF4OqAQAA4hKBCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArBengWjUqFHy0ksvSapUqSRz5szSrFkzOXr0qFsZh8MhoaGhkj17dkmePLlUr15dDh8+7FbmwYMH0qtXL8mYMaMEBwdLkyZN5Ny5c25lrl+/Lu3atZM0adKYTS/fuHHjL3mdAADAv8VpINq8ebP06NFDduzYIevWrZPHjx9LnTp15O7du64yY8eOlXHjxsmkSZNk9+7dkjVrVqldu7bcvn3bVaZPnz6yePFiWbhwoWzZskXu3LkjjRo1krCwMFeZtm3byv79+2X16tVm08saigAAAAIc2gTjhfv375tWmxQpUpjrp0+fNmGkcOHCJsw8jytXrpiWIg1KVatWNc+jLUMaeAYNGuRqDcqSJYuMGTNGunXrJjdv3pRMmTLJnDlzpHXr1qbM+fPnJWfOnPL9999L3bp15ciRI6Z+GrzKly9vyujlihUryi+//CIFCxZ8at1u3bplWpb0+VKnTv1crxMAEH/kGbwyrqtghVOjG8bK48b0+9vrFqKmTZvK7NmzzWXtctKA8emnn5r9kydPfq5Ka2VV+vTpzc+TJ0/KxYsX3YJWUFCQVKtWTbZt22au7927Vx49euRWRkNU0aJFXWW2b99uDoYzDKkKFSqYfc4ynjR46UEMvwEAgITJ60D0008/SZUqVczlb7/91rTWaCuRhqTPP//8mSuirUF9+/aVypUrmzCjNAwpfY7w9LrzNv0ZGBgo6dKli7aMtjx50n3OMpGNb3KON9JNW5wAAEDC5HUgunfvnhkErdauXSstWrSQRIkSmRYXDUbPqmfPnnLgwAFZsGBBhNsCAgIihCfPfZ48y0RWPrrHGTJkiGmxcm5nz5714tUAAIAEHYjy5csnS5YsMQFhzZo1rq6qy5cvP/PYGp0htmzZMtm4caPkyJHDtV8HUCvPVhx9LmerkZZ5+PChmUUWXZlLly5FOmbJs/UpfNecvp7wGwAASJi8DkTDhg2T/v37S548ecyYHB2Y7GwtKlWqlFePpS002jK0aNEi+eGHHyQkJMTtdr2uYUZnoDlp+NFB15UqVTLXy5QpI0mTJnUrc+HCBTl06JCrjNZRW3l27drlKrNz506zz1kGAADYK4m3d/j73/9uxvlo6ChRooRrf82aNaV58+ZePZZOuZ8/f74sXbrUdMM5W4J0zI6uOaTdWTrDbOTIkZI/f36z6WWd4abT6J1lO3XqJP369ZMMGTKYAdka2IoVKya1atUyZQoVKiT16tWTLl26yJQpU8y+rl27mqn5MZlhBgAAEjavA5G2xGggcnZnOZUrV87rJ3fOStPFFsObMWOGdOjQwVweOHCgmerfvXt30y2mrVLaGuUcx6TGjx8vSZIkkVatWpmyGs5mzpwpiRMndpWZN2+e9O7d29XFp4s36tpGAAAAXq9DpGNpdEq6dlXp9HcNMy+//LKkTJkyQR9N1iECADuxDtFfI96tQ6StNJs2bTItLPv27ZNXX33VdFPpLLPBgwc/b70BAAD8v4XIkw5e/uSTT0yX1JMnT9xOl5GQ0EIEAHaihciOFiKvxxDpaTB0lpe2EulPDUA6pkhXq9YuNAAAgPjG60BUpEgRc+4wnf31/vvvm+sAAADxmddjiHSm1gsvvCChoaHy5ptvmpOurlq1ypxhHgAAwIpANGHCBHM+M135+b333jNdZrpYY8aMGc3AagAAgAQfiJx0APXjx4/NytE6DV/POH/q1Cnf1g4AAMAfA9Hbb79tVqjWM8V369ZNzp8/b1Z9/u9//xvlmeMBAAAS1KDq33//3ZwCQxdkLFq0aOzUCgAAwJ8D0bfffhs7NQEAAIhPY4jmzJljTteRPXt2OX36tGuwtZ6kFQAAIMEHIj0ha9++faVBgwZy48YN18rUadOmNaEIAAAgwQeiiRMnytSpU2Xo0KFuZ5MvW7asHDx40Nf1AwAA8L9AdPLkSSlVqlSE/UFBQXL37l1f1QsAAMB/A1FISIjs378/wn5drbpw4cK+qhcAAID/zjIbMGCA9OjRQ/78809xOByya9cuWbBggYwaNUqmTZsWO7UEAADwp0DUsWNHs0L1wIED5d69e9K2bVtzbrPPPvtM2rRpEzu1BAAA8KdApHRhRt2uXr1qTuGhq1YDAABYFYic9ISuAAAAVgSi0qVLy4YNGyRdunRmhllAQECUZX/66Sdf1g8AAMA/AlHTpk3NtHrn5egCEQAAQIIMRMOHD3ddDg0Njc36AAAA+P86RDrLTLvPdMo9AACAlYHo2rVr0rBhQ8mRI4f069cv0kUaAQAAEnQgWrZsmVy8eNF0o+3du1fKlCljVqgeOXKknDp1KnZqCQAA4E+ByHlm+65du8qmTZvk9OnTphttzpw5ki9fPt/XEAAAwB8DkdOjR49kz549snPnTtM6lCVLFt/VDAAAwJ8D0caNG81K1RqA2rdvL6lSpZLly5fL2bNnfV9DAAAAf1upWgdT68DqunXrypQpU6Rx48aSLFmy2KkdAACAPwaiYcOGyauvvmpWrQYAALCyy0wHU2sYOn78uKxZs0bu379v9rMuEQAAsGodopo1a0qBAgWkQYMGcuHCBbO/c+fOZl0iAACABB+I3nnnHUmaNKmcOXNGUqRI4drfunVrWb16ta/rBwAA4H9jiNauXWu6ynRwdXj58+c3axIBAAAk+Baiu3fvurUMOV29elWCgoJ8VS8AAAD/DURVq1aV2bNnu64HBATIkydP5OOPP5YaNWr4un4AAAD+12Wmwad69epmheqHDx/KwIED5fDhw/LHH3/I1q1bY6eWAAAA/tRCpCdyPXDggJQrV05q165tutBatGgh+/btk7x588ZOLQEAAPylhUjPXVanTh2zQvWIESNir1YAAAD+2kKk0+0PHTpkxg0BAABY22X2xhtvyPTp02OnNgAAAPFhULUOpJ42bZqsW7dOypYtK8HBwW63jxs3zpf1AwAA8L9ApF1mpUuXNpd//fVXt9voSgMAAFYEoo0bN8ZOTQAAAOLLGCIAAICEhkAEAACsRyACAADWIxABAADrxSgQ6ayy69evm8sffPCB3Lt3z/oDBwAALAtER44cMecsU3rKjjt37sR2vQAAAPxr2n3JkiWlY8eOUrlyZXE4HPLJJ59IypQpIy07bNiwGD/5jz/+KB9//LHs3btXLly4IIsXL5ZmzZq5bu/QoYPMmjXL7T7ly5eXHTt2uK4/ePBA+vfvLwsWLJD79+9LzZo15csvv5QcOXK4ymjrVu/evWXZsmXmepMmTWTixImSNm3aGNcVAABYHohmzpwpw4cPlxUrVpjFF1etWiVJkkS8q97mTSDSVqcSJUqYsNWyZctIy9SrV09mzJjhuh4YGOh2e58+fWT58uWycOFCyZAhg/Tr108aNWpkQlbixIlNmbZt28q5c+dk9erV5nrXrl2lXbt25n4AAAAxCkQFCxY0gUMlSpRINmzYIJkzZ37uo1e/fn2zRScoKEiyZs0a6W03b94051WbM2eO1KpVy+ybO3eu5MyZU9avXy9169Y13X0ahLRVSVuX1NSpU6VixYpy9OhR89oAAIDdvJ5l9uTJE5+EoZjatGmTeb4CBQpIly5d5PLly67btBXo0aNHUqdOHde+7NmzS9GiRWXbtm3m+vbt2yVNmjSuMKQqVKhg9jnLAAAAu3l96g7122+/yYQJE0zri3aTFSpUSN5++23JmzevTyunrUevvvqq5M6dW06ePCnvv/++vPLKKyYIacvRxYsXTRdaunTp3O6XJUsWc5vSn5EFON3nLBMZHZukm9OtW7d8+toAAEA8biFas2aNFC5cWHbt2iXFixc3rTE7d+6UIkWKyLp163xaudatW0vDhg3NczRu3NiMXdITyq5cuTLa++nA7/Anmo3spLOeZTyNGjXKtCI5N+2GAwAACZPXLUSDBw+Wd955R0aPHh1h/6BBg6R27doSW7Jly2Zai44dO2au69iihw8fmllk4VuJtFutUqVKrjKXLl2K8FhXrlwxLUlRGTJkiPTt29ethYhQBABAwuR1C5F2k3Xq1CnC/jfffFN+/vlniU3Xrl2Ts2fPmmCkypQpI0mTJnVrmdLp+4cOHXIFIh08rYOvtUXLSVu0dJ+zTGS0Sy516tRuGwAASJi8biHKlCmT7N+/X/Lnz++2X/d5O9haF3g8fvy467qOE9LHSZ8+vdlCQ0PNdHwNQKdOnZJ3331XMmbMKM2bNzfltStLw5lOtdcp93ofXZOoWLFirllnOr5Jp+7rgOwpU6a4pt3r1HxmmAEAgGcKRBosNFCcOHHCtLDoOJwtW7bImDFjTDDxxp49e6RGjRqu684uqvbt28vkyZPl4MGDMnv2bLlx44YJRVr266+/llSpUrnuM378eLMmUqtWrVwLM+q6Sc41iNS8efPMwozO2Wi6MOOkSZP4BAAAACPAoaOLvaDFdYbZp59+KufPn3dNdR8wYIAJHdENVI7PdAyRtkhpVxvdZwBgjzyDo5/IA984NbqhxOX3t9ctRBp4dFC1brdv3zb7wrfYAAAAWLEOkRNBCAAAWDnLDAAAIKEhEAEAAOsRiAAAgPW8CkR6IlWd+q6nzwAAALAyEOmq0LoKdEKdWg8AAOzkdZfZG2+8IdOnT4+d2gAAAMSHafd6MtVp06aZ84eVLVtWgoOD3W4fN26cL+sHAADgf4FIu8xKly5tLnuOJaIrDQAAWBGINm7cGDs1AQAAiG/T7vUs9WvWrDEnVFVenhINAAAg/gaia9eumTPKFyhQQBo0aCAXLlww+zt37uz12e4BAADiZSDSk7rq9PszZ85IihQpXPtbt24tq1ev9nX9AAAA/G8M0dq1a01XWY4cOdz258+fX06fPu3LugEAAPhnILp7965by5DT1atXJSgoyFf1skqewSvjugrWODW6YVxXAQCQELrMqlatKrNnz3abav/kyRP5+OOPzWk9AAAAEnwLkQaf6tWry549e8wijQMHDpTDhw/LH3/8IVu3bo2dWgIAAPhTC1HhwoXlwIEDUq5cOaldu7bpQmvRooXs27dP8ubNGzu1BAAA8KcWIpU1a1YZMWKE72sDAAAQXwLR9evXzQlejxw5YsYQFSpUSDp27Cjp06f3fQ0BAAD8rcts8+bNEhISIp9//rkJRjp2SC/rPr0NAAAgwbcQ9ejRQ1q1aiWTJ0+WxIkTm31hYWHSvXt3c5ue/BUAACBBtxD99ttv5hQdzjCk9HLfvn3NbQAAAAk+EJUuXdqMHfKk+0qWLOmregEAAPhXl5lOs3fq3bu3vP322+Zs9xUqVDD7duzYIV988YWMHj069moKAAAQl4FIW350NpnD4XDt0wUZPbVt29ac5BUAACDBBaKTJ0/Gfk0AAAD8ORDlzp079msCAAAQnxZm/P333815yy5fvmxO7BqejjECAABI0IFoxowZ8tZbb0lgYKBkyJDBjC1y0ssEIgAAkOAD0bBhw8w2ZMgQSZTI61n7AAAAfsfrRHPv3j1p06YNYQgAANgbiDp16iTffPNN7NQGAAAgPnSZjRo1Sho1aiSrV6+WYsWKSdKkSd1uHzdunC/rBwAA4H+BaOTIkbJmzRopWLCgue45qBoAACDBByJtAfrXv/4lHTp0iJ0aAQAA+PsYoqCgIHn55ZdjpzYAAADxIRDpiV0nTpwYO7UBAACID11mu3btkh9++EFWrFghRYoUiTCoetGiRb6sHwAAgP8ForRp00qLFi1ipzYAAADx5dQdAAAACQnn3gAAANbzuoUoJCQk2vWGTpw4Yf1BBQAACTwQ9enTx+36o0ePZN++fWbl6gEDBviybgAAAP4ZiHTafWS++OIL2bNnjy/qBAAAED/HENWvX1++++47Xz0cAABA/AtE3377raRPn95XDwcAAOC/XWalSpVyG1TtcDjk4sWLcuXKFfnyyy99XT8AAAD/C0TNmjVzu54oUSLJlCmTVK9eXV588UVf1g0AAMA/A9Hw4cNjpyYAAAA2Lsz4448/SuPGjSV79uymG27JkiVut2t3XGhoqLk9efLkphXq8OHDbmUePHggvXr1kowZM0pwcLA0adJEzp0751bm+vXr0q5dO0mTJo3Z9PKNGzf+ktcIAAASUCDSrrHEiRNHuyVJ4l2D0927d6VEiRIyadKkSG8fO3asjBs3zty+e/duyZo1q9SuXVtu377tti7S4sWLZeHChbJlyxa5c+eONGrUSMLCwlxl2rZtK/v37zdrJemmlzUUAQAAqBgnGA0dUdm2bZtMnDjRtOh4O1Vft8joY02YMEGGDh3qOpnsrFmzJEuWLDJ//nzp1q2b3Lx5U6ZPny5z5syRWrVqmTJz586VnDlzyvr166Vu3bpy5MgRE4J27Ngh5cuXN2WmTp0qFStWlKNHj0rBggX5JAAAYLkYB6KmTZtG2PfLL7/IkCFDZPny5fL666/LP/7xD59V7OTJk2b2Wp06dVz7goKCpFq1aiaAaSDau3evWSk7fBntXitatKgpo4Fo+/btppvMGYZUhQoVzD4tE1Ug0q443Zxu3brls9cGAAASwBii8+fPS5cuXaR48eLy+PFj0wWlrTe5cuXyWcU0DCltEQpPrztv05+BgYGSLl26aMtkzpw5wuPrPmeZyIwaNco15kg3bXUCAAAJk1eBSLuoBg0aJPny5TODmzds2GBah7RFJrZ4nkhWu9KiO7lsZGUiK/+0x9GWL329zu3s2bPPVH8AAJCAApEOcP7b3/4mK1askAULFpjupipVqsRaxXQAtfJsxbl8+bKr1UjLPHz40Mwii67MpUuXIjy+LiTp2foUnnbPpU6d2m0DAACWjyEaPHiwmfqurUPaPaZbZBYtWuSTioWEhJgws27dOrM6ttLws3nzZhkzZoy5XqZMGUmaNKkp06pVK7PvwoULcujQIRPglA6e1haeXbt2Sbly5cy+nTt3mn2VKlXySV0BAIAlgeiNN954aleVt3SK/PHjx90GUut4JD0nmo5H0in1I0eOlPz585tNL6dIkcJMo1c6tqdTp07Sr18/yZAhg7lf//79pVixYq5ZZ4UKFZJ69eqZMU9Tpkwx+7p27Wqm5jPDDAAAeBWIZs6c6fMjtmfPHqlRo4bret++fc3P9u3bm+cbOHCg3L9/X7p37266xXSm2Nq1ayVVqlSu+4wfP96sf6QtRFq2Zs2a5r66LpLTvHnzpHfv3q7ZaLp4Y1RrHwEAAPsEOLxdPMhSOu1eW6S0q83X44nyDF7p08dD1E6NbsjhAeAVfkfH79/PMf3+jtNTdwAAAPgDAhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFjPrwNRaGioBAQEuG1Zs2Z13e5wOEyZ7NmzS/LkyaV69epy+PBht8d48OCB9OrVSzJmzCjBwcHSpEkTOXfuXBy8GgAA4K/8OhCpIkWKyIULF1zbwYMHXbeNHTtWxo0bJ5MmTZLdu3ebsFS7dm25ffu2q0yfPn1k8eLFsnDhQtmyZYvcuXNHGjVqJGFhYXH0igAAgL9JIn4uSZIkbq1C4VuHJkyYIEOHDpUWLVqYfbNmzZIsWbLI/PnzpVu3bnLz5k2ZPn26zJkzR2rVqmXKzJ07V3LmzCnr16+XunXr/uWvBwAA+B+/byE6duyY6RILCQmRNm3ayIkTJ8z+kydPysWLF6VOnTquskFBQVKtWjXZtm2bub5371559OiRWxl9rKJFi7rKREW72m7duuW2AQCAhMmvA1H58uVl9uzZsmbNGpk6daoJQJUqVZJr166Zy0pbhMLT687b9GdgYKCkS5cuyjJRGTVqlKRJk8a1aasSAABImPw6ENWvX19atmwpxYoVM11eK1eudHWNOelAa8+uNM99nmJSZsiQIabLzbmdPXv2uV4LAADwX34diDzpLDENR9qN5hxX5NnSc/nyZVerkZZ5+PChXL9+PcoyUdHut9SpU7ttAAAgYYpXgUjH9Rw5ckSyZctmxhRp4Fm3bp3rdg0/mzdvNt1qqkyZMpI0aVK3MjpT7dChQ64yAAAAfj3LrH///tK4cWPJlSuXadX58MMPzeDm9u3bmy4vnVI/cuRIyZ8/v9n0cooUKaRt27bm/jr2p1OnTtKvXz/JkCGDpE+f3jymswsOAADA7wORLqD42muvydWrVyVTpkxSoUIF2bFjh+TOndvcPnDgQLl//750797ddIvpIOy1a9dKqlSpXI8xfvx4M3W/VatWpmzNmjVl5syZkjhx4jh8ZQAAwJ8EOHSEMZ5KW6a0xUkHWPt6PFGewf93sDhi36nRDTnMALzC7+j4/fs5pt/f8WoMEQAAQGwgEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWM+vF2YEAESN9XEA36GFCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKyXxPojAMDn8gxeyVEFEK/QQgQAAKxHIAIAANYjEAEAAOsRiAAAgPUYVA2rMNgXABAZWogAAID1CEQAAMB6BCIAAGA9AhEAALAegQgAAFiPQAQAAKxHIAIAANYjEAEAAOsRiAAAgPUIRAAAwHoEIgAAYD0CEQAAsB6BCAAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAelYFoi+//FJCQkIkWbJkUqZMGfnPf/4T11UCAAB+wJpA9PXXX0ufPn1k6NChsm/fPqlSpYrUr19fzpw5E9dVAwAAccyaQDRu3Djp1KmTdO7cWQoVKiQTJkyQnDlzyuTJk+O6agAAII5ZEYgePnwoe/fulTp16rjt1+vbtm2Ls3oBAAD/kEQscPXqVQkLC5MsWbK47dfrFy9ejPQ+Dx48MJvTzZs3zc9bt275vH5PHtzz+WMCABCf3IqF79fwj+twOKItZ0UgcgoICHC7rgfHc5/TqFGjZMSIERH2azcbAADwrTQTJFbdvn1b0qRJY3cgypgxoyROnDhCa9Dly5cjtBo5DRkyRPr27eu6/uTJE/njjz8kQ4YMUYYoW2ja1mB49uxZSZ06dVxXJ0HjWHOcExI+zxznuKCNHxqGsmfPHm05KwJRYGCgmWa/bt06ad68uWu/Xm/atGmk9wkKCjJbeGnTpo31usYnGoYIRBzrhITPNMc5IeHz/P9F1zJkVSBS2trTrl07KVu2rFSsWFG++uorM+X+rbfeiuuqAQCAOGZNIGrdurVcu3ZNPvjgA7lw4YIULVpUvv/+e8mdO3dcVw0AAMQxawKR6t69u9nwfLQrcfjw4RG6FOF7HOu/BseZ45yQ8Hl+NgGOp81DAwAASOCsWJgRAAAgOgQiAABgPQIRAACwHoEIAABYj0AEr/z444/SuHFjs+Knrti9ZMkSjqCP6WljXnrpJUmVKpVkzpxZmjVrJkePHuU4+9jkyZOlePHirsXrdH2yVatWcZz/gs+3/u7o06cPx9rHQkNDzbENv2XNmpXjHEMEInjl7t27UqJECZk0aRJHLpZs3rxZevToITt27DCrqT9+/Fjq1Kljjj18J0eOHDJ69GjZs2eP2V555RWzcv3hw4c5zLFk9+7dZlFcDaKIHUWKFDFr7Tm3gwcPcqhjyKp1iPD86tevbzbEntWrV7tdnzFjhmkp2rt3r1StWpVD7yPa0hneRx99ZFqNNIjqlwp8686dO/L666/L1KlT5cMPP+TwxpIkSZLQKvSMaCEC/NzNmzfNz/Tp08d1VRKssLAwWbhwoWmF064z+J62ejZs2FBq1arF4Y1Fx44dM0MaQkJCpE2bNnLixAmOdwzRQgT4MV03Vc/DV7lyZXO6GfiWdidoAPrzzz8lZcqUsnjxYilcuDCH2cc0bP7000+mywyxp3z58jJ79mwpUKCAXLp0ybTEVapUyXQDZ8iQgUP/FAQiwI/17NlTDhw4IFu2bInrqiRIBQsWlP3798uNGzfku+++k/bt25sxXIQi3zl79qy8/fbbsnbtWkmWLJkPHxmewg9nKFasmAn7efPmlVmzZpk/rBA9AhHgp3r16iXLli0zM/t0ADB8LzAwUPLly2culy1b1rRgfPbZZzJlyhQOt4/o2LfLly9LmTJl3Loo9XOtkzMePHggiRMn5njHguDgYBOMtBsNT0cgAvywm0zDkHbfbNq0yYwFwF937PULGr5Ts2bNCDOdOnbsKC+++KIMGjSIMBSL9LN85MgRqVKlSmw+TYJBIILXM0WOHz/uun7y5EnT5aADfnPlysXR9NHg0/nz58vSpUvNWkQXL140+9OkSSPJkyfnGPvIu+++a7oYcubMKbdv3zbjXDSAes7yw/PRz7Dn+DdtudAxLYyL863+/fub2ZP6u1hb5XQM0a1bt0xXMJ6OQASv6HotNWrUcF139kvrf7iZM2dyNH1Ap36r6tWrR5h+36FDB46xj+ig03bt2pm1WjRs6to4GoZq167NMUa8dO7cOXnttdfk6tWrkilTJqlQoYJZRiJ37txxXbV4IcChbcQAAAAWYx0iAABgPQIRAACwHoEIAABYj0AEAACsRyACAADWIxABAADrEYgAAID1CEQA8P+cOnVKAgICzOrrAOxCIAIQ7+iK3RpcdEuSJIk5VcH//u//yvXr1716jGbNmrnt09N46MrVnFICsA+BCEC8VK9ePRNetFVn2rRpsnz5cunevftzPaaedT1r1qwmZAGwC4EIQLwUFBRkwkuOHDmkTp060rp1a1m7dq25LSwsTDp16iQhISHmhLgFCxaUzz77zHXf0NBQmTVrljmBrrOlSU/s6tllpvv0+oYNG6Rs2bKSIkUKqVSpkhw9etStLnoSzcyZM5sTmXbu3FkGDx4sJUuW/IuPCIDnQSACEO+dOHHCnJg1adKk5vqTJ09MUPr3v/8tP//8swwbNsyc3V6vO88K3qpVK1crk24adKIydOhQ+fTTT83JjbX16M0333TdNm/ePPnoo49kzJgxsnfvXtN95zxBL4D4g3ZhAPHSihUrJGXKlKY16M8//zT7xo0bZ35qMBoxYoSrrLYUbdu2zQQiDUJ6P205evDggWllehoNPNWqVTOXtfWnYcOG5jmTJUsmEydONK1RHTt2NLdr+NKWqjt37sTSKwcQG2ghAhAv1ahRw3Rt7dy5U3r16iV169Y1P53++c9/mm6uTJkymQA0depUOXPmzDM9V/HixV2Xs2XLZn5evnzZ/NTus3LlyrmV97wOwP8RiADES8HBwZIvXz4TVj7//HPT2uNsFdKWoHfeecd0bWlrjQYnbcF5+PDhMz2XsytO6ZgiZ7ec5z4nh8PxjK8KQFwhEAFIEIYPHy6ffPKJnD9/Xv7zn/+YMUE666xUqVImOP32229u5QMDA0132/PSAdu7du1y26djjQDELwQiAAlC9erVpUiRIjJy5EgTgDSUrFmzRn799Vd5//33Zffu3W7l8+TJIwcOHDBdXlevXpVHjx490/NqN9306dPNrLVjx46ZGWf6uJ6tRgD8G4EIQILRt29fM1ZIF1xs0aKFmYpfvnx5uXbtWoQ1irp06WJad5zjjLZu3fpMz/n666/LkCFDzMy10qVLy8mTJ82ijzrgGkD8EeCgsxsAfKp27dpm9tqcOXM4skA8wbR7AHgO9+7dMzPadJabrnS9YMECWb9+vaxbt47jCsQjtBABwHO4f/++NG7cWH766Scz00274d577z3TZQcg/iAQAQAA6zGoGgAAWI9ABAAArEcgAgAA1iMQAQAA6xGIAACA9QhEAADAegQiAABgPQIRAACwHoEIAACI7f4P+kBaQ+oEOacAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.figure()\n", + "plt.hist(df[\"rating\"], bins=range(int(df[\"rating\"].min()), int(df[\"rating\"].max()) + 2), align=\"left\")\n", + "plt.xlabel(\"Rating\")\n", + "plt.ylabel(\"Number of reviews\")\n", + "plt.title(\"Rating distribution (sampled)\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "multitag", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/uber_cleaned.ipynb b/notebooks/uber_cleaned.ipynb index 2430469..48cd3a7 100644 --- a/notebooks/uber_cleaned.ipynb +++ b/notebooks/uber_cleaned.ipynb @@ -1,5 +1,24 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "6318e936", + "metadata": {}, + "source": [ + "# Preprocessing Requirements\n", + "## RECLASS\n", + "\n", + "**Purpose**: Verify the output datasets.\n", + "\n", + "- `uber_reviews_cleaned.csv` - Full cleaned dataset\n", + "- `uber_reviews_sampled.csv` - tbc\n", + "- `uber_reviews_tagged.csv` - tbc ...Annotation progress\n", + "\n", + "**Dataset**: Uber Customer Reviews from Google Play (Kaggle)\n", + "\n", + "---" + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/src/multitag.py b/src/multitag.py index ded02f2..f862941 100644 --- a/src/multitag.py +++ b/src/multitag.py @@ -2,6 +2,8 @@ # This app enables manual annotation of reviews in the Uber dataset, for training with # to achieve review classifications with multi task deep learning +# In another time I would have had much more tasks / classifications so mtl can perform better (that would mean better labelling), +#at least that is my prediction of why this may not be as good as I wanted import tkinter as tk from tkinter import ttk import pandas as pd @@ -47,8 +49,8 @@ class MultiTag: self.color_complete = "#00AA00" # Paths - tagged_path = "multitag/data/uber_reviews_tagged.csv" - sampled_path = "multitag/data/uber_reviews_sampled.csv" + tagged_path = "data/uber_reviews_tagged.csv" + sampled_path = "data/uber_reviews_sampled.csv" # self.load_review_data("data/uber_reviews_sampled.csv") # self.load_review_data("data/uber_reviews_tagged.csv") if not os.path.exists(tagged_path): @@ -256,7 +258,7 @@ class MultiTag: def submit_tag(self): self.review_data.at[self.current_review_index, "tagged"] = 1 - self.save_tags("multitag/data/uber_reviews_tagged.csv") + self.save_tags("data/uber_reviews_tagged.csv") self.display_next_review() def try_submit(self, event): @@ -291,9 +293,9 @@ class MultiTag: print(f"SESSION COMPLETE") print(f"{'='*50}") print(f"Total tagged: {tagged_count} / {len(self.review_data)}") - print(f"Saved to: multitag/data/uber_reviews_tagged.csv") + print(f"Saved to: data/uber_reviews_tagged.csv") print(f"Bye (ʘ‿ʘ)╯") - self.save_tags("multitag/data/uber_reviews_tagged.csv") + self.save_tags("data/uber_reviews_tagged.csv") self.root.destroy() def get_current_review_index(self):