diff --git a/.gitignore b/.gitignore
index aa3e679..8054551 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 multitag/data/*.csv
 multitag/raw_data/
+multitag/.ipynb_checkpoints
+multitag/.vscode
diff --git a/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb b/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb
deleted file mode 100644
index 910a184..0000000
--- a/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb
+++ /dev/null
@@ -1,1471 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv0 = pd.read_csv(\"spotify.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "c0631560-c1be-4bbf-b050-b6a552e74d63",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Time_submitted</th>\n",
-       "      <th>Review</th>\n",
-       "      <th>Rating</th>\n",
-       "      <th>Total_thumbsup</th>\n",
-       "      <th>Reply</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2022-07-09 15:00:00</td>\n",
-       "      <td>Great music service, the audio is high quality...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2022-07-09 14:21:22</td>\n",
-       "      <td>Please ignore previous negative rating. This a...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2022-07-09 13:27:32</td>\n",
-       "      <td>This pop-up \"Get the best Spotify experience o...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2022-07-09 13:26:45</td>\n",
-       "      <td>Really buggy and terrible to use as of recently</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2022-07-09 13:20:49</td>\n",
-       "      <td>Dear Spotify why do I get songs that I didn't ...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        Time_submitted                                             Review  \\\n",
-       "0  2022-07-09 15:00:00  Great music service, the audio is high quality...   \n",
-       "1  2022-07-09 14:21:22  Please ignore previous negative rating. This a...   \n",
-       "2  2022-07-09 13:27:32  This pop-up \"Get the best Spotify experience o...   \n",
-       "3  2022-07-09 13:26:45    Really buggy and terrible to use as of recently   \n",
-       "4  2022-07-09 13:20:49  Dear Spotify why do I get songs that I didn't ...   \n",
-       "\n",
-       "   Rating  Total_thumbsup Reply  \n",
-       "0       5               2   NaN  \n",
-       "1       5               1   NaN  \n",
-       "2       4               0   NaN  \n",
-       "3       1               1   NaN  \n",
-       "4       1               1   NaN  "
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv0.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Time_submitted</th>\n",
-       "      <th>Review</th>\n",
-       "      <th>Rating</th>\n",
-       "      <th>Total_thumbsup</th>\n",
-       "      <th>Reply</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>61589</th>\n",
-       "      <td>2022-01-01 03:01:29</td>\n",
-       "      <td>Even though it was communicated that lyrics fe...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>61590</th>\n",
-       "      <td>2022-01-01 02:13:40</td>\n",
-       "      <td>Use to be sooo good back when I had it, and wh...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>61591</th>\n",
-       "      <td>2022-01-01 01:02:29</td>\n",
-       "      <td>This app would be good if not for it taking ov...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>10</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>61592</th>\n",
-       "      <td>2022-01-01 00:49:23</td>\n",
-       "      <td>The app is good hard to navigate and won't jus...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>61593</th>\n",
-       "      <td>2022-01-01 00:19:09</td>\n",
-       "      <td>Its good but sometimes it doesnt load the musi...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            Time_submitted                                             Review  \\\n",
-       "61589  2022-01-01 03:01:29  Even though it was communicated that lyrics fe...   \n",
-       "61590  2022-01-01 02:13:40  Use to be sooo good back when I had it, and wh...   \n",
-       "61591  2022-01-01 01:02:29  This app would be good if not for it taking ov...   \n",
-       "61592  2022-01-01 00:49:23  The app is good hard to navigate and won't jus...   \n",
-       "61593  2022-01-01 00:19:09  Its good but sometimes it doesnt load the musi...   \n",
-       "\n",
-       "       Rating  Total_thumbsup Reply  \n",
-       "61589       1               6   NaN  \n",
-       "61590       1               0   NaN  \n",
-       "61591       2              10   NaN  \n",
-       "61592       2               1   NaN  \n",
-       "61593       4               0   NaN  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv0.tail()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "1efba903-5004-4d7b-a1ee-42f333111055",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>host id</th>\n",
-       "      <th>host_identity_verified</th>\n",
-       "      <th>host name</th>\n",
-       "      <th>neighbourhood group</th>\n",
-       "      <th>neighbourhood</th>\n",
-       "      <th>lat</th>\n",
-       "      <th>long</th>\n",
-       "      <th>country</th>\n",
-       "      <th>...</th>\n",
-       "      <th>service fee</th>\n",
-       "      <th>minimum nights</th>\n",
-       "      <th>number of reviews</th>\n",
-       "      <th>last review</th>\n",
-       "      <th>reviews per month</th>\n",
-       "      <th>review rate number</th>\n",
-       "      <th>calculated host listings count</th>\n",
-       "      <th>availability 365</th>\n",
-       "      <th>house_rules</th>\n",
-       "      <th>license</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1001254</td>\n",
-       "      <td>Clean &amp; quiet apt home by the park</td>\n",
-       "      <td>80014485718</td>\n",
-       "      <td>unconfirmed</td>\n",
-       "      <td>Madaline</td>\n",
-       "      <td>Brooklyn</td>\n",
-       "      <td>Kensington</td>\n",
-       "      <td>40.64749</td>\n",
-       "      <td>-73.97237</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$193</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>10/19/2021</td>\n",
-       "      <td>0.21</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>286.0</td>\n",
-       "      <td>Clean up and treat the home the way you'd like...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1002102</td>\n",
-       "      <td>Skylit Midtown Castle</td>\n",
-       "      <td>52335172823</td>\n",
-       "      <td>verified</td>\n",
-       "      <td>Jenna</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>Midtown</td>\n",
-       "      <td>40.75362</td>\n",
-       "      <td>-73.98377</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$28</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>5/21/2022</td>\n",
-       "      <td>0.38</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>228.0</td>\n",
-       "      <td>Pet friendly but please confirm with me if the...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1002403</td>\n",
-       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
-       "      <td>78829239556</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Elise</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>Harlem</td>\n",
-       "      <td>40.80902</td>\n",
-       "      <td>-73.94190</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$124</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>352.0</td>\n",
-       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1002755</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>85098326012</td>\n",
-       "      <td>unconfirmed</td>\n",
-       "      <td>Garry</td>\n",
-       "      <td>Brooklyn</td>\n",
-       "      <td>Clinton Hill</td>\n",
-       "      <td>40.68514</td>\n",
-       "      <td>-73.95976</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$74</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>7/5/2019</td>\n",
-       "      <td>4.64</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>322.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1003689</td>\n",
-       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
-       "      <td>92037596077</td>\n",
-       "      <td>verified</td>\n",
-       "      <td>Lyndon</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>East Harlem</td>\n",
-       "      <td>40.79851</td>\n",
-       "      <td>-73.94399</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$41</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>11/19/2018</td>\n",
-       "      <td>0.10</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>289.0</td>\n",
-       "      <td>Please no smoking in the house, porch or on th...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        id                                              NAME      host id  \\\n",
-       "0  1001254                Clean & quiet apt home by the park  80014485718   \n",
-       "1  1002102                             Skylit Midtown Castle  52335172823   \n",
-       "2  1002403               THE VILLAGE OF HARLEM....NEW YORK !  78829239556   \n",
-       "3  1002755                                               NaN  85098326012   \n",
-       "4  1003689  Entire Apt: Spacious Studio/Loft by central park  92037596077   \n",
-       "\n",
-       "  host_identity_verified host name neighbourhood group neighbourhood  \\\n",
-       "0            unconfirmed  Madaline            Brooklyn    Kensington   \n",
-       "1               verified     Jenna           Manhattan       Midtown   \n",
-       "2                    NaN     Elise           Manhattan        Harlem   \n",
-       "3            unconfirmed     Garry            Brooklyn  Clinton Hill   \n",
-       "4               verified    Lyndon           Manhattan   East Harlem   \n",
-       "\n",
-       "        lat      long        country  ... service fee minimum nights  \\\n",
-       "0  40.64749 -73.97237  United States  ...       $193            10.0   \n",
-       "1  40.75362 -73.98377  United States  ...        $28            30.0   \n",
-       "2  40.80902 -73.94190  United States  ...       $124             3.0   \n",
-       "3  40.68514 -73.95976  United States  ...        $74            30.0   \n",
-       "4  40.79851 -73.94399  United States  ...        $41            10.0   \n",
-       "\n",
-       "  number of reviews last review  reviews per month review rate number  \\\n",
-       "0               9.0  10/19/2021               0.21                4.0   \n",
-       "1              45.0   5/21/2022               0.38                4.0   \n",
-       "2               0.0         NaN                NaN                5.0   \n",
-       "3             270.0    7/5/2019               4.64                4.0   \n",
-       "4               9.0  11/19/2018               0.10                3.0   \n",
-       "\n",
-       "  calculated host listings count  availability 365  \\\n",
-       "0                            6.0             286.0   \n",
-       "1                            2.0             228.0   \n",
-       "2                            1.0             352.0   \n",
-       "3                            1.0             322.0   \n",
-       "4                            1.0             289.0   \n",
-       "\n",
-       "                                         house_rules license  \n",
-       "0  Clean up and treat the home the way you'd like...     NaN  \n",
-       "1  Pet friendly but please confirm with me if the...     NaN  \n",
-       "2  I encourage you to use my kitchen, cooking and...     NaN  \n",
-       "3                                                NaN     NaN  \n",
-       "4  Please no smoking in the house, porch or on th...     NaN  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv1.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "39d543be-013a-4976-942d-f9884274c7be",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>host id</th>\n",
-       "      <th>host_identity_verified</th>\n",
-       "      <th>host name</th>\n",
-       "      <th>neighbourhood group</th>\n",
-       "      <th>neighbourhood</th>\n",
-       "      <th>lat</th>\n",
-       "      <th>long</th>\n",
-       "      <th>country</th>\n",
-       "      <th>...</th>\n",
-       "      <th>service fee</th>\n",
-       "      <th>minimum nights</th>\n",
-       "      <th>number of reviews</th>\n",
-       "      <th>last review</th>\n",
-       "      <th>reviews per month</th>\n",
-       "      <th>review rate number</th>\n",
-       "      <th>calculated host listings count</th>\n",
-       "      <th>availability 365</th>\n",
-       "      <th>house_rules</th>\n",
-       "      <th>license</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1001254</td>\n",
-       "      <td>Clean &amp; quiet apt home by the park</td>\n",
-       "      <td>80014485718</td>\n",
-       "      <td>unconfirmed</td>\n",
-       "      <td>Madaline</td>\n",
-       "      <td>Brooklyn</td>\n",
-       "      <td>Kensington</td>\n",
-       "      <td>40.64749</td>\n",
-       "      <td>-73.97237</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$193</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>10/19/2021</td>\n",
-       "      <td>0.21</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>286.0</td>\n",
-       "      <td>Clean up and treat the home the way you'd like...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1002102</td>\n",
-       "      <td>Skylit Midtown Castle</td>\n",
-       "      <td>52335172823</td>\n",
-       "      <td>verified</td>\n",
-       "      <td>Jenna</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>Midtown</td>\n",
-       "      <td>40.75362</td>\n",
-       "      <td>-73.98377</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$28</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>5/21/2022</td>\n",
-       "      <td>0.38</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>228.0</td>\n",
-       "      <td>Pet friendly but please confirm with me if the...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1002403</td>\n",
-       "      <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
-       "      <td>78829239556</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Elise</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>Harlem</td>\n",
-       "      <td>40.80902</td>\n",
-       "      <td>-73.94190</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$124</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>352.0</td>\n",
-       "      <td>I encourage you to use my kitchen, cooking and...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1002755</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>85098326012</td>\n",
-       "      <td>unconfirmed</td>\n",
-       "      <td>Garry</td>\n",
-       "      <td>Brooklyn</td>\n",
-       "      <td>Clinton Hill</td>\n",
-       "      <td>40.68514</td>\n",
-       "      <td>-73.95976</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$74</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>7/5/2019</td>\n",
-       "      <td>4.64</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>322.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1003689</td>\n",
-       "      <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
-       "      <td>92037596077</td>\n",
-       "      <td>verified</td>\n",
-       "      <td>Lyndon</td>\n",
-       "      <td>Manhattan</td>\n",
-       "      <td>East Harlem</td>\n",
-       "      <td>40.79851</td>\n",
-       "      <td>-73.94399</td>\n",
-       "      <td>United States</td>\n",
-       "      <td>...</td>\n",
-       "      <td>$41</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>11/19/2018</td>\n",
-       "      <td>0.10</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>289.0</td>\n",
-       "      <td>Please no smoking in the house, porch or on th...</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        id                                              NAME      host id  \\\n",
-       "0  1001254                Clean & quiet apt home by the park  80014485718   \n",
-       "1  1002102                             Skylit Midtown Castle  52335172823   \n",
-       "2  1002403               THE VILLAGE OF HARLEM....NEW YORK !  78829239556   \n",
-       "3  1002755                                               NaN  85098326012   \n",
-       "4  1003689  Entire Apt: Spacious Studio/Loft by central park  92037596077   \n",
-       "\n",
-       "  host_identity_verified host name neighbourhood group neighbourhood  \\\n",
-       "0            unconfirmed  Madaline            Brooklyn    Kensington   \n",
-       "1               verified     Jenna           Manhattan       Midtown   \n",
-       "2                    NaN     Elise           Manhattan        Harlem   \n",
-       "3            unconfirmed     Garry            Brooklyn  Clinton Hill   \n",
-       "4               verified    Lyndon           Manhattan   East Harlem   \n",
-       "\n",
-       "        lat      long        country  ... service fee minimum nights  \\\n",
-       "0  40.64749 -73.97237  United States  ...       $193            10.0   \n",
-       "1  40.75362 -73.98377  United States  ...        $28            30.0   \n",
-       "2  40.80902 -73.94190  United States  ...       $124             3.0   \n",
-       "3  40.68514 -73.95976  United States  ...        $74            30.0   \n",
-       "4  40.79851 -73.94399  United States  ...        $41            10.0   \n",
-       "\n",
-       "  number of reviews last review  reviews per month review rate number  \\\n",
-       "0               9.0  10/19/2021               0.21                4.0   \n",
-       "1              45.0   5/21/2022               0.38                4.0   \n",
-       "2               0.0         NaN                NaN                5.0   \n",
-       "3             270.0    7/5/2019               4.64                4.0   \n",
-       "4               9.0  11/19/2018               0.10                3.0   \n",
-       "\n",
-       "  calculated host listings count  availability 365  \\\n",
-       "0                            6.0             286.0   \n",
-       "1                            2.0             228.0   \n",
-       "2                            1.0             352.0   \n",
-       "3                            1.0             322.0   \n",
-       "4                            1.0             289.0   \n",
-       "\n",
-       "                                         house_rules license  \n",
-       "0  Clean up and treat the home the way you'd like...     NaN  \n",
-       "1  Pet friendly but please confirm with me if the...     NaN  \n",
-       "2  I encourage you to use my kitchen, cooking and...     NaN  \n",
-       "3                                                NaN     NaN  \n",
-       "4  Please no smoking in the house, porch or on th...     NaN  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv1.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "95f93b29-94be-4c93-9793-cf51c2ba2442",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Unnamed: 0</th>\n",
-       "      <th>review_id</th>\n",
-       "      <th>pseudo_author_id</th>\n",
-       "      <th>author_name</th>\n",
-       "      <th>review_text</th>\n",
-       "      <th>review_rating</th>\n",
-       "      <th>review_likes</th>\n",
-       "      <th>author_app_version</th>\n",
-       "      <th>review_timestamp</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>6caba53d-789d-4733-bad5-c7491daf80f2</td>\n",
-       "      <td>152618553977019693742</td>\n",
-       "      <td>A Google user</td>\n",
-       "      <td>Nice app need to add red light cam.</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.99.2.3</td>\n",
-       "      <td>2009-06-30 16:48:15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>30c15838-8b02-4dae-8f51-25905cb40b68</td>\n",
-       "      <td>234382942865437071667</td>\n",
-       "      <td>A Google user</td>\n",
-       "      <td>Really cool social app. Lots of potential to b...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.99.2.3</td>\n",
-       "      <td>2009-06-30 16:58:43</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>c090400e-f88f-4129-930d-a650f3163a11</td>\n",
-       "      <td>174473604608358796368</td>\n",
-       "      <td>A Google user</td>\n",
-       "      <td>I was all excited about this app (ehat a great...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.99.2.3</td>\n",
-       "      <td>2009-06-30 17:08:33</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>f6f37456-793b-4786-af6e-454a811361bf</td>\n",
-       "      <td>286593453219054880269</td>\n",
-       "      <td>A Google user</td>\n",
-       "      <td>I love this app! Lol</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.99.2.3</td>\n",
-       "      <td>2009-06-30 17:37:22</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>4</td>\n",
-       "      <td>8ae5d962-7c0c-476d-82fa-79f6e5484acc</td>\n",
-       "      <td>167276875678680630145</td>\n",
-       "      <td>A Google user</td>\n",
-       "      <td>Great app i like the idea of your car being pa...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.99.2.3</td>\n",
-       "      <td>2009-06-30 23:58:43</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Unnamed: 0                             review_id       pseudo_author_id  \\\n",
-       "0           0  6caba53d-789d-4733-bad5-c7491daf80f2  152618553977019693742   \n",
-       "1           1  30c15838-8b02-4dae-8f51-25905cb40b68  234382942865437071667   \n",
-       "2           2  c090400e-f88f-4129-930d-a650f3163a11  174473604608358796368   \n",
-       "3           3  f6f37456-793b-4786-af6e-454a811361bf  286593453219054880269   \n",
-       "4           4  8ae5d962-7c0c-476d-82fa-79f6e5484acc  167276875678680630145   \n",
-       "\n",
-       "     author_name                                        review_text  \\\n",
-       "0  A Google user                Nice app need to add red light cam.   \n",
-       "1  A Google user  Really cool social app. Lots of potential to b...   \n",
-       "2  A Google user  I was all excited about this app (ehat a great...   \n",
-       "3  A Google user                               I love this app! Lol   \n",
-       "4  A Google user  Great app i like the idea of your car being pa...   \n",
-       "\n",
-       "   review_rating  review_likes author_app_version     review_timestamp  \n",
-       "0              5             0           0.99.2.3  2009-06-30 16:48:15  \n",
-       "1              5             0           0.99.2.3  2009-06-30 16:58:43  \n",
-       "2              1             0           0.99.2.3  2009-06-30 17:08:33  \n",
-       "3              5             0           0.99.2.3  2009-06-30 17:37:22  \n",
-       "4              4             0           0.99.2.3  2009-06-30 23:58:43  "
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv02.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Unnamed: 0</th>\n",
-       "      <th>review_id</th>\n",
-       "      <th>pseudo_author_id</th>\n",
-       "      <th>author_name</th>\n",
-       "      <th>review_text</th>\n",
-       "      <th>review_rating</th>\n",
-       "      <th>review_likes</th>\n",
-       "      <th>author_app_version</th>\n",
-       "      <th>review_timestamp</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>780068</th>\n",
-       "      <td>780068</td>\n",
-       "      <td>01655504-5a51-4c19-b313-2bd5fa3f253a</td>\n",
-       "      <td>680743620884748258838</td>\n",
-       "      <td>Ma********ll</td>\n",
-       "      <td>Freezes</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2023-11-17 03:18:26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>780069</th>\n",
-       "      <td>780069</td>\n",
-       "      <td>f04306cb-af60-4a44-aebc-c37122620319</td>\n",
-       "      <td>266638684561117704682</td>\n",
-       "      <td>Zu******el</td>\n",
-       "      <td>To stuck</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2023-11-17 03:18:38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>780070</th>\n",
-       "      <td>780070</td>\n",
-       "      <td>894e3c41-ca20-4781-9308-70eeb060a865</td>\n",
-       "      <td>154572309081670894420</td>\n",
-       "      <td>br**********ji</td>\n",
-       "      <td>racist made app</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>4.99.0.2</td>\n",
-       "      <td>2023-11-17 03:23:20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>780071</th>\n",
-       "      <td>780071</td>\n",
-       "      <td>4fafb0b1-485e-473e-9bcd-d5c9848424d2</td>\n",
-       "      <td>154995071911163107981</td>\n",
-       "      <td>Mo***********da</td>\n",
-       "      <td>بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>4.99.1.1</td>\n",
-       "      <td>2023-11-17 04:05:02</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>780072</th>\n",
-       "      <td>780072</td>\n",
-       "      <td>d1570ba0-ffc5-4fc6-8d34-12daba4b38e2</td>\n",
-       "      <td>200574835524973617311</td>\n",
-       "      <td>Re***********iz</td>\n",
-       "      <td>Best app ever used.</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>4.99.0.2</td>\n",
-       "      <td>2023-11-17 04:06:44</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        Unnamed: 0                             review_id  \\\n",
-       "780068      780068  01655504-5a51-4c19-b313-2bd5fa3f253a   \n",
-       "780069      780069  f04306cb-af60-4a44-aebc-c37122620319   \n",
-       "780070      780070  894e3c41-ca20-4781-9308-70eeb060a865   \n",
-       "780071      780071  4fafb0b1-485e-473e-9bcd-d5c9848424d2   \n",
-       "780072      780072  d1570ba0-ffc5-4fc6-8d34-12daba4b38e2   \n",
-       "\n",
-       "             pseudo_author_id      author_name  \\\n",
-       "780068  680743620884748258838     Ma********ll   \n",
-       "780069  266638684561117704682       Zu******el   \n",
-       "780070  154572309081670894420   br**********ji   \n",
-       "780071  154995071911163107981  Mo***********da   \n",
-       "780072  200574835524973617311  Re***********iz   \n",
-       "\n",
-       "                                              review_text  review_rating  \\\n",
-       "780068                                            Freezes              3   \n",
-       "780069                                           To stuck              1   \n",
-       "780070                                    racist made app              1   \n",
-       "780071  بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...              5   \n",
-       "780072                                Best app ever used.              5   \n",
-       "\n",
-       "        review_likes author_app_version     review_timestamp  \n",
-       "780068             0                NaN  2023-11-17 03:18:26  \n",
-       "780069             0                NaN  2023-11-17 03:18:38  \n",
-       "780070             0           4.99.0.2  2023-11-17 03:23:20  \n",
-       "780071             0           4.99.1.1  2023-11-17 04:05:02  \n",
-       "780072             0           4.99.0.2  2023-11-17 04:06:44  "
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "csv02.tail()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>source</th>\n",
-       "      <th>review_id</th>\n",
-       "      <th>user_name</th>\n",
-       "      <th>review_title</th>\n",
-       "      <th>review_description</th>\n",
-       "      <th>rating</th>\n",
-       "      <th>thumbs_up</th>\n",
-       "      <th>review_date</th>\n",
-       "      <th>developer_response</th>\n",
-       "      <th>developer_response_date</th>\n",
-       "      <th>appVersion</th>\n",
-       "      <th>laguage_code</th>\n",
-       "      <th>country_code</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>18d6584c-d0e9-4833-a744-f607058aee97</td>\n",
-       "      <td>Milky Way</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Suddenly, the driver can't have my location an...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 17:48:51</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>50a08f18-cece-4ddf-b617-028844c8aa28</td>\n",
-       "      <td>Bradlee Severa</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Very cordial.. And helped with a quick turnaro...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 17:38:35</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.485.10000</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7</td>\n",
-       "      <td>Amit Aggarwal</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Very good experience</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 17:38:17</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.486.10002</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>502702a9-25ed-4373-a96c-7fa1f06caacd</td>\n",
-       "      <td>Bryant Inman</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>All I use</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 17:37:45</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.467.10008</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>f47a3fb6-23db-49bd-9e63-f33c8d724d07</td>\n",
-       "      <td>Addie Whittaker</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>I have enjoyed traveling by Uber my drivers ha...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 17:36:56</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.486.10002</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        source                             review_id        user_name  \\\n",
-       "0  Google Play  18d6584c-d0e9-4833-a744-f607058aee97        Milky Way   \n",
-       "1  Google Play  50a08f18-cece-4ddf-b617-028844c8aa28   Bradlee Severa   \n",
-       "2  Google Play  b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7    Amit Aggarwal   \n",
-       "3  Google Play  502702a9-25ed-4373-a96c-7fa1f06caacd     Bryant Inman   \n",
-       "4  Google Play  f47a3fb6-23db-49bd-9e63-f33c8d724d07  Addie Whittaker   \n",
-       "\n",
-       "  review_title                                 review_description  rating  \\\n",
-       "0          NaN  Suddenly, the driver can't have my location an...       1   \n",
-       "1          NaN  Very cordial.. And helped with a quick turnaro...       5   \n",
-       "2          NaN                               Very good experience       5   \n",
-       "3          NaN                                          All I use       5   \n",
-       "4          NaN  I have enjoyed traveling by Uber my drivers ha...       5   \n",
-       "\n",
-       "   thumbs_up          review_date developer_response developer_response_date  \\\n",
-       "0        0.0  2023-08-10 17:48:51                NaN                     NaN   \n",
-       "1        0.0  2023-08-10 17:38:35                NaN                     NaN   \n",
-       "2        0.0  2023-08-10 17:38:17                NaN                     NaN   \n",
-       "3        0.0  2023-08-10 17:37:45                NaN                     NaN   \n",
-       "4        0.0  2023-08-10 17:36:56                NaN                     NaN   \n",
-       "\n",
-       "    appVersion laguage_code country_code  \n",
-       "0          NaN           en           in  \n",
-       "1  4.485.10000           en           in  \n",
-       "2  4.486.10002           en           in  \n",
-       "3  4.467.10008           en           in  \n",
-       "4  4.486.10002           en           in  "
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n",
-    "uber.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "d4ace5a2-346a-4099-9854-1cac2749a216",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1069616, 13)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(np.shape(uber))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>source</th>\n",
-       "      <th>review_id</th>\n",
-       "      <th>user_name</th>\n",
-       "      <th>review_title</th>\n",
-       "      <th>review_description</th>\n",
-       "      <th>rating</th>\n",
-       "      <th>thumbs_up</th>\n",
-       "      <th>review_date</th>\n",
-       "      <th>developer_response</th>\n",
-       "      <th>developer_response_date</th>\n",
-       "      <th>appVersion</th>\n",
-       "      <th>laguage_code</th>\n",
-       "      <th>country_code</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>fbc7ffc9-5a89-446e-87fd-d69bf4a7f984</td>\n",
-       "      <td>Puipuii Ralte</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>The map in Ola is so messed up, i have to pay ...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 16:40:50</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>6.3.2</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>5a0051fb-220a-45b2-ba94-a15a2949218f</td>\n",
-       "      <td>Deepak Kumar</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Deepak Kumar.... 🙏🙏🙏🙏🙏]</td>\n",
-       "      <td>5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 16:36:14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>71ebf933-b734-474d-bb65-a18c90906ed2</td>\n",
-       "      <td>Ahamed Azarudeen</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Such aa irresponsible app more then I waiting ...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 16:29:31</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>6.3.1</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>e1cc0010-60b3-4126-99c2-e8549088566a</td>\n",
-       "      <td>Rahil Syed</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Worst</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 15:52:06</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>5.0.4</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Google Play</td>\n",
-       "      <td>77cf1be1-b428-4493-ae25-e0f288f79b8f</td>\n",
-       "      <td>vin 007</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Too much expensive .. try UBer... They are pro...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2023-08-10 15:51:10</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>en</td>\n",
-       "      <td>in</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        source                             review_id         user_name  \\\n",
-       "0  Google Play  fbc7ffc9-5a89-446e-87fd-d69bf4a7f984     Puipuii Ralte   \n",
-       "1  Google Play  5a0051fb-220a-45b2-ba94-a15a2949218f      Deepak Kumar   \n",
-       "2  Google Play  71ebf933-b734-474d-bb65-a18c90906ed2  Ahamed Azarudeen   \n",
-       "3  Google Play  e1cc0010-60b3-4126-99c2-e8549088566a        Rahil Syed   \n",
-       "4  Google Play  77cf1be1-b428-4493-ae25-e0f288f79b8f           vin 007   \n",
-       "\n",
-       "  review_title                                 review_description  rating  \\\n",
-       "0          NaN  The map in Ola is so messed up, i have to pay ...       1   \n",
-       "1          NaN                            Deepak Kumar.... 🙏🙏🙏🙏🙏]       5   \n",
-       "2          NaN  Such aa irresponsible app more then I waiting ...       1   \n",
-       "3          NaN                                              Worst       1   \n",
-       "4          NaN  Too much expensive .. try UBer... They are pro...       1   \n",
-       "\n",
-       "   thumbs_up          review_date developer_response developer_response_date  \\\n",
-       "0        0.0  2023-08-10 16:40:50                NaN                     NaN   \n",
-       "1        0.0  2023-08-10 16:36:14                NaN                     NaN   \n",
-       "2        0.0  2023-08-10 16:29:31                NaN                     NaN   \n",
-       "3        0.0  2023-08-10 15:52:06                NaN                     NaN   \n",
-       "4        0.0  2023-08-10 15:51:10                NaN                     NaN   \n",
-       "\n",
-       "  appVersion laguage_code country_code  \n",
-       "0      6.3.2           en           in  \n",
-       "1        NaN           en           in  \n",
-       "2      6.3.1           en           in  \n",
-       "3      5.0.4           en           in  \n",
-       "4        NaN           en           in  "
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n",
-    "ola_df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "878a39c4-45d5-41d6-82b0-9c373c28e280",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "count    357678.000000\n",
-      "mean         92.402697\n",
-      "std         125.489169\n",
-      "min           1.000000\n",
-      "25%           8.000000\n",
-      "50%          33.000000\n",
-      "75%         131.000000\n",
-      "max        2877.000000\n",
-      "Name: review_length, dtype: float64\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check average review length\n",
-    "ola_df['review_length'] = ola_df['review_description'].str.len()\n",
-    "print(ola_df['review_length'].describe())\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "1dd032ba-343b-4402-9d96-ee5e0432ab07",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Substantive reviews: 204715\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Filter out very short reviews\n",
-    "substantive_reviews = ola_df[ola_df['review_length'] > 20]\n",
-    "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "count    1.069447e+06\n",
-      "mean     7.023987e+01\n",
-      "std      1.158196e+02\n",
-      "min      1.000000e+00\n",
-      "25%      8.000000e+00\n",
-      "50%      2.100000e+01\n",
-      "75%      7.800000e+01\n",
-      "max      3.792000e+03\n",
-      "Name: review_length, dtype: float64\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check average review length\n",
-    "uber['review_length'] = uber['review_description'].str.len()\n",
-    "print(uber['review_length'].describe())\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "2dd05939-e87c-443d-9012-e5f45cf64ff5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Substantive reviews: 542110\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Filter out very short reviews\n",
-    "substantive_reviews = uber[uber['review_length'] > 20]\n",
-    "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/multitag/multitag.py b/multitag/multitag.py
index a728777..32aca56 100644
--- a/multitag/multitag.py
+++ b/multitag/multitag.py
@@ -1,3 +1,14 @@
+#   TODO:   Refactor,especially change expected names as I jumped the gun when first making this without sampling properly
+#   TODO:   Add button labels and finalise the categories of aspects
+#   TODO:   Ensure there is persistent progress tracking implentation before labelling 
+#   TODO:   Finalise keybinds
+#   TODO:   Display progress    e.g. review 1020 of 5000
+#   TODO:   Validate saving progres
+#   TODO:   Loop instead of pressing enter
+#   TODO:   Autosave ? / confirm quit at least
+#   TODO:   More visual q's
+
+
 import tkinter as tk
 from tkinter import ttk
 import pandas as pd
diff --git a/multitag/preprocess.py b/multitag/preprocess.py
index 9167b1a..646378b 100644
--- a/multitag/preprocess.py
+++ b/multitag/preprocess.py
@@ -88,9 +88,12 @@ def preprocess_uber_reviews(input_path, output_path):
     df['word_count'] = df['review_clean'].str.split().str.len()
     
     # 5. Remove short reviews
-    review_length_limit = 5
-    print(f"\n4. Removing short reviews (< {review_length_limit})...")
-    print("   Rationale: Insufficient context for classification")
+    review_length_limit = 5     ### limit review length ###
+    print(f"\n4. Removing short reviews so reviews have better context / (usefulness) (< {review_length_limit})...") 
+    # 1 word reviews provide little to draw conclusions from and bloat the 
+    # dataset a lot, nearly 50% of reviews!
+
+    # display changes
     before = len(df)
     df = df[df['word_count'] >= review_length_limit]
     removed = before - len(df)
@@ -119,8 +122,10 @@ def preprocess_uber_reviews(input_path, output_path):
     print("PREPROCESSING COMPLETE")
     print("="*50)
     print(f"\nFinal dataset: {len(df_clean):,} reviews")
-    print(f"Data source: Indian Uber market (predominantly English)")
-    print(f"Quality filters: word_count >= 5, duplicates removed")
+    print(f"Quality filters: word_count >= 5, duplicates removed") 
+    # while this does remove a some legitimate reviews which would provide use in classification
+    # it also allows us to find a higher total amount of useful reviews, after seeing the results of 1, 2, 3, 4, 5 
+    # it showed the most amount of formative reviews without seeming excessive in data removal
     
     print("\nRating distribution:")
     rating_dist = df_clean['rating'].value_counts().sort_index()
@@ -138,7 +143,7 @@ def preprocess_uber_reviews(input_path, output_path):
     print(f"  Short reviews: {df_clean[df_clean['word_count'] < 5]}")
     print(f"  Null values: {df_clean.isnull().sum().to_dict()}")
     print(f"  Duplicate reviews: {df_clean.duplicated(subset=['review']).sum()}")
-    # lang detection takes 5+ mins
+    # lang detection takes 5+ mins so leaving it commented for now 
     #df_clean['detected_lang'] = df_clean['review'].apply(detect_language)
     #print(f"  Detected languages:\n {df_clean['detected_lang'].value_counts( )}")
     
@@ -150,13 +155,13 @@ def preprocess_uber_reviews(input_path, output_path):
         if len(df_clean[df_clean['rating'] == rating]) > 0:
             sample = df_clean[df_clean['rating'] == rating].sample(min(2, len(df_clean[df_clean['rating'] == rating])))
             print(f"\n{rating} {"✭" * rating} REVIEWS:")
-            for idx, row in sample.iterrows():
+            for index, row in sample.iterrows():
                 print(f"  • ({row['word_count']} words) {row['review'][:100]}")
     
     # Note about language
     print("Language detection not applied due to unreliability on short")
-    print("informal text. Dataset is from the Indian market, labeled as English.")
-    print("Manual annotation phase will identify any non-English reviews. And put aside.")
+    print("informal text. The Uber Reviews Dataset is from the Indian market, labeled as English.")
+    print(" ...Manual annotation phase will identify any non-English reviews")
     
     return df_clean
 
diff --git a/multitag/sampler.py b/multitag/sampler.py
index a90a9a4..25a16ae 100644
--- a/multitag/sampler.py
+++ b/multitag/sampler.py
@@ -1,22 +1,45 @@
+#   TODO:   Fix get_stratified_sample() replace broken x() with actual working logic
+#   TODO:   Add verification comparison between ratings
+#   TODO:   implement sample_with_keywords() add to lists, and implement logic
+#   TODO:   Clean up the logging print statements
+
+
 import pandas as pd
 import numpy as np
 
 print(pd.__version__)
 print(np.__version__)
 
-path = "data/uber_reviews.csv"
-sampled_path = "data/uber_reviews_sampled.csv"
+path = "multitag/data/uber_reviews_cleaned.csv"
+sampled_path = "multitag/data/uber_reviews_sampled.csv"
+original_path = "multitag/data/uber_reviews.csv" ### only for distribution comparison
 class Sampler:
-    def __init__(self, data_path):
+    def __init__(self, data_path, target_samples):
 
         self.data_path = data_path
+        self.target_samples = 5000  # target number of samples
+        self.stratify_column = "rating"  # column to stratify by (another sampleset will use keyword boosting to aid feature request / bug report numbers)
+
+        self.original_data = pd.read_csv(original_path, low_memory=False)
         self.data = pd.read_csv(self.data_path, low_memory=False)
         self.total = len(self.data)  # total number of records in the dataset
-        self.target_samples = 5000  # target number of samples
-        self.stratify_column = "rating"  # column to stratify by
 
+        print("="*50)
+        print("SAMPLER INITIALIZED")
+        print("="*50,"\n")
+
+
+        print(f"Total records in dataset: {self.total}")
         print(f"Data loaded from {self.data_path}, total records: {len(self.data)}")
-        print(self.data.head())
+        #print(self.data.head())
+        #print(f"\nCurrent distribution:")
+        #print(self.data[self.stratify_column].value_counts().sort_index())
+        #print(f"\nColumns: {self.data.columns.tolist()}")
+        print(f"Percentage distribution (working data):")
+        print((self.data[self.stratify_column].value_counts(normalize=True).sort_index() * 100).round(1),"\n")
+        _origdist = self.original_data[self.stratify_column].value_counts(normalize=True).sort_index()
+        print(f"Original Distribution from {original_path}:")
+        print((_origdist*100).round(1),"\n")
 
         self.data.info()
 
@@ -31,36 +54,128 @@ class Sampler:
     2     3.9% (41707)
     Name: proportion, dtype: object
     """
-
-    def get_stratified_sample(self):
-        stratified_sample = self.data.groupby(self.stratify_column).apply(
-            lambda x: x.sample(n=int(len(x) / self.total * self.target_samples)),
-            # include_groups=False
-    )
-        return stratified_sample
-sampler = Sampler("data/uber_reviews.csv")
-
-
-
-to_sample = input("Do you want to create a stratified sample of the data? (y/n): ")             
-
-if to_sample == 'y':
-    sampled = sampler.get_stratified_sample()
-    sampled.to_csv("data/uber_reviews_sampled.csv", index=False)
-    print("Original columns:", sampler.data.columns.tolist())
-    print("Sampled columns:", sampled.columns.tolist())
-    print("Stratified sample saved to data/uber_reviews_sampled.csv")
-elif to_sample == 'n':
-    sampled_data = pd.read_csv("data/uber_reviews_sampled.csv", low_memory=False)
-    """
-    debug to check sampled data matches original columns
-    print("Original columns:", sampler.data.columns.tolist())
-    print("Sampled columns:", sampled_data.columns.tolist())
     """
     
-    print("Original data distribution:")
-    print(sampler.data["rating"].value_counts())
-    print("Sampled data distribution:")
-    print(sampled_data["rating"].value_counts())
-else:
-    print("Invalid input, please enter 'y' or 'n'")
+    Sample size by rating
+    Redundant calculation, kept for clarity
+    Doesn't factor that the distribution changed greatly after preprocessing
+
+    """
+    def get_stratified_sample(self) -> pd.Series:
+        stratified_sample = self.data.groupby(self.stratify_column).apply(self.x)
+        return stratified_sample
+    
+
+    # x(self): helper function for get_proportional_sample and get_stratified_sample =FIX=
+    def x(self, ):    
+        return lambda x: x.sample(n=int(len(x) / self.total * self.target_samples))
+    """
+    get_proportional_sample()
+
+    """
+    
+    """
+    original_distribution_sample()
+    The main sampling method for our labelling as it 
+    keeps composition of the original uber dataset
+    which is a fairer comparison, may also work better in general
+
+    inputs:
+
+    outputs:
+
+    """
+    def original_distribution_sample(self):
+        original_dist = {
+            5: int(0.571 * self.target_samples), 
+            1: int(0.265 * self.target_samples),  
+            4: int(0.078 * self.target_samples),  
+            3: int(0.047 * self.target_samples),  
+            2: int(0.039 * self.target_samples)   
+        }        
+        print("Target Distribution =", original_dist)
+        samples = []
+        for rating, num_samples in original_dist.items():
+            rating_data = self.data[self.data[self.stratify_column] == rating]
+            if len(rating_data) < num_samples:
+                print("Missing samples available for rating")
+                num_samples = len(rating_data)
+            sample = rating_data.sample(n = num_samples,random_state=33)
+            samples.append(sample)
+        original_sample = pd.concat(samples, ignore_index=True)
+        return original_sample
+    
+    """
+    sample_with_keywords()
+
+    In order to train on more bugs and features data in 
+    future this method was created
+    - 2000 balanced by rating (400 per)
+    - 1500 likely bugs using bug_keywords list
+    - 1500 likely features using feature_keywords list
+
+    inputs:
+    outputs:
+    
+    """
+
+    def sample_with_keywords():
+        #TODO add keywords for feature classification
+        print(f"\n{"="*50}")
+        print("Keyword influenced / rating stratified set")
+        print(f"\n{"="*50}")
+
+        bug_keywords = ["crash","crashes", "freeze", "freezes", "error",
+                        "stops", "doesnt work", "doesn't work","loading",
+                        "blank", "stuck", "load", "loads", "broken", "breaks",
+                        "glitch", "glitches", "issue", "could you", "fix",
+                        "failed"]
+
+
+        return 
+    
+    def save_sample(self, sample_df,output_path):
+        """Save sample and display statistics"""
+        sample_df.to_csv(output_path, index=False)
+        
+        print(f"\n{'='*50}")
+        print("SAMPLE SAVED")
+        print(f"{'='*50}")
+        print(f"Location: {output_path}")
+        print(f"Total samples: {len(sample_df):,}")
+        print(f"\nDistribution:")
+        for rating in sorted(sample_df[self.stratify_column].unique()):
+            count = (sample_df[self.stratify_column] == rating).sum()
+            pct = count / len(sample_df) * 100
+            print(f"  {rating}★: {count:,} ({pct:.1f}%)")
+
+def main():
+    
+    sampler = Sampler("multitag/data/uber_reviews_cleaned.csv", target_samples=5000)
+
+    # Choose sampling strategy
+    print(f"\n{'='*50}")
+    print("SAMPLING STRATEGY OPTIONS")
+    print(f"{'='*50}")
+    print("1. get_stratified_sample() stratified by current distribution")
+    print("2. original_distribution_sample() stratified by the original data distribution")
+    print("3. get_keyword_boosted_sample() stratified using original distribution but also using a keyword dictionary")
+    
+    choice = input("\nEnter choice (1-3): ").strip()
+    
+    if choice == '1':
+        sample = sampler.get_stratified_sample()
+        sampler.save_sample(sample, "multitag/data/uber_reviews_sampled.csv")
+        
+    elif choice == '2':
+        sample = sampler.original_distribution_sample()
+        sampler.save_sample(sample, "multitag/data/uber_reviews_sampled.csv")
+        
+    elif choice == '3':
+        sample = sampler.get_keyword_boosted_sample()
+        sampler.save_sample(sample, "multitag/data/uber_reviews_sampled.csv")
+        
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

	Time_submitted	Review	Rating	Total_thumbsup	Reply
0	2022-07-09 15:00:00	Great music service, the audio is high quality...	5	2	NaN
1	2022-07-09 14:21:22	Please ignore previous negative rating. This a...	5	1	NaN
2	2022-07-09 13:27:32	This pop-up \"Get the best Spotify experience o...	4	0	NaN
3	2022-07-09 13:26:45	Really buggy and terrible to use as of recently	1	1	NaN
4	2022-07-09 13:20:49	Dear Spotify why do I get songs that I didn't ...	1	1	NaN
	Time_submitted	Review	Rating	Total_thumbsup	Reply
61589	2022-01-01 03:01:29	Even though it was communicated that lyrics fe...	1	6	NaN
61590	2022-01-01 02:13:40	Use to be sooo good back when I had it, and wh...	1	0	NaN
61591	2022-01-01 01:02:29	This app would be good if not for it taking ov...	2	10	NaN
61592	2022-01-01 00:49:23	The app is good hard to navigate and won't jus...	2	1	NaN
61593	2022-01-01 00:19:09	Its good but sometimes it doesnt load the musi...	4	0	NaN
	id	NAME	host id	host_identity_verified	host name	neighbourhood group	neighbourhood	lat	long	country	...	service fee	minimum nights	number of reviews	last review	reviews per month	review rate number	calculated host listings count	availability 365	house_rules	license
0	1001254	Clean & quiet apt home by the park	80014485718	unconfirmed	Madaline	Brooklyn	Kensington	40.64749	-73.97237	United States	...	$193	10.0	9.0	10/19/2021	0.21	4.0	6.0	286.0	Clean up and treat the home the way you'd like...	NaN
1	1002102	Skylit Midtown Castle	52335172823	verified	Jenna	Manhattan	Midtown	40.75362	-73.98377	United States	...	$28	30.0	45.0	5/21/2022	0.38	4.0	2.0	228.0	Pet friendly but please confirm with me if the...	NaN
2	1002403	THE VILLAGE OF HARLEM....NEW YORK !	78829239556	NaN	Elise	Manhattan	Harlem	40.80902	-73.94190	United States	...	$124	3.0	0.0	NaN	NaN	5.0	1.0	352.0	I encourage you to use my kitchen, cooking and...	NaN
3	1002755	NaN	85098326012	unconfirmed	Garry	Brooklyn	Clinton Hill	40.68514	-73.95976	United States	...	$74	30.0	270.0	7/5/2019	4.64	4.0	1.0	322.0	NaN	NaN
4	1003689	Entire Apt: Spacious Studio/Loft by central park	92037596077	verified	Lyndon	Manhattan	East Harlem	40.79851	-73.94399	United States	...	$41	10.0	9.0	11/19/2018	0.10	3.0	1.0	289.0	Please no smoking in the house, porch or on th...	NaN
	Unnamed: 0	review_id	pseudo_author_id	author_name	review_text	review_rating	author_app_version	review_timestamp
0	0	6caba53d-789d-4733-bad5-c7491daf80f2	152618553977019693742	A Google user	Nice app need to add red light cam.	5	0.99.2.3	2009-06-30 16:48:15
1	1	30c15838-8b02-4dae-8f51-25905cb40b68	234382942865437071667	A Google user	Really cool social app. Lots of potential to b...	5	0.99.2.3	2009-06-30 16:58:43
2	2	c090400e-f88f-4129-930d-a650f3163a11	174473604608358796368	A Google user	I was all excited about this app (ehat a great...	1	0.99.2.3	2009-06-30 17:08:33
3	3	f6f37456-793b-4786-af6e-454a811361bf	286593453219054880269	A Google user	I love this app! Lol	5	0.99.2.3	2009-06-30 17:37:22
4	4	8ae5d962-7c0c-476d-82fa-79f6e5484acc	167276875678680630145	A Google user	Great app i like the idea of your car being pa...	4	0.99.2.3	2009-06-30 23:58:43
	Unnamed: 0	review_id	pseudo_author_id	author_name	review_text	review_rating	author_app_version	review_timestamp
780068	780068	01655504-5a51-4c19-b313-2bd5fa3f253a	680743620884748258838	Ma********ll	Freezes	3	NaN	2023-11-17 03:18:26
780069	780069	f04306cb-af60-4a44-aebc-c37122620319	266638684561117704682	Zu******el	To stuck	1	NaN	2023-11-17 03:18:38
780070	780070	894e3c41-ca20-4781-9308-70eeb060a865	154572309081670894420	br**********ji	racist made app	1	4.99.0.2	2023-11-17 03:23:20
780071	780071	4fafb0b1-485e-473e-9bcd-d5c9848424d2	154995071911163107981	Mo***********da	بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...	5	4.99.1.1	2023-11-17 04:05:02
780072	780072	d1570ba0-ffc5-4fc6-8d34-12daba4b38e2	200574835524973617311	Re***********iz	Best app ever used.	5	4.99.0.2	2023-11-17 04:06:44
	source	review_id	user_name	review_title	review_description	rating	review_date	developer_response	developer_response_date	appVersion	laguage_code	country_code
0	Google Play	18d6584c-d0e9-4833-a744-f607058aee97	Milky Way	NaN	Suddenly, the driver can't have my location an...	1	2023-08-10 17:48:51	NaN	NaN	NaN	en	in
1	Google Play	50a08f18-cece-4ddf-b617-028844c8aa28	Bradlee Severa	NaN	Very cordial.. And helped with a quick turnaro...	5	2023-08-10 17:38:35	NaN	NaN	4.485.10000	en	in
2	Google Play	b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7	Amit Aggarwal	NaN	Very good experience	5	2023-08-10 17:38:17	NaN	NaN	4.486.10002	en	in
3	Google Play	502702a9-25ed-4373-a96c-7fa1f06caacd	Bryant Inman	NaN	All I use	5	2023-08-10 17:37:45	NaN	NaN	4.467.10008	en	in
4	Google Play	f47a3fb6-23db-49bd-9e63-f33c8d724d07	Addie Whittaker	NaN	I have enjoyed traveling by Uber my drivers ha...	5	2023-08-10 17:36:56	NaN	NaN	4.486.10002	en	in
	source	review_id	user_name	review_title	review_description	rating	review_date	developer_response	developer_response_date	appVersion	laguage_code	country_code
0	Google Play	fbc7ffc9-5a89-446e-87fd-d69bf4a7f984	Puipuii Ralte	NaN	The map in Ola is so messed up, i have to pay ...	1	2023-08-10 16:40:50	NaN	NaN	6.3.2	en	in
1	Google Play	5a0051fb-220a-45b2-ba94-a15a2949218f	Deepak Kumar	NaN	Deepak Kumar.... 🙏🙏🙏🙏🙏]	5	2023-08-10 16:36:14	NaN	NaN	NaN	en	in
2	Google Play	71ebf933-b734-474d-bb65-a18c90906ed2	Ahamed Azarudeen	NaN	Such aa irresponsible app more then I waiting ...	1	2023-08-10 16:29:31	NaN	NaN	6.3.1	en	in
3	Google Play	e1cc0010-60b3-4126-99c2-e8549088566a	Rahil Syed	NaN	Worst	1	2023-08-10 15:52:06	NaN	NaN	5.0.4	en	in
4	Google Play	77cf1be1-b428-4493-ae25-e0f288f79b8f	vin 007	NaN	Too much expensive .. try UBer... They are pro...	1	2023-08-10 15:51:10	NaN	NaN	NaN	en	in