diff --git a/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb b/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb
new file mode 100644
index 0000000..910a184
--- /dev/null
+++ b/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb
@@ -0,0 +1,1471 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "csv0 = pd.read_csv(\"spotify.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c0631560-c1be-4bbf-b050-b6a552e74d63",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Time_submitted | \n",
+ " Review | \n",
+ " Rating | \n",
+ " Total_thumbsup | \n",
+ " Reply | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2022-07-09 15:00:00 | \n",
+ " Great music service, the audio is high quality... | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2022-07-09 14:21:22 | \n",
+ " Please ignore previous negative rating. This a... | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2022-07-09 13:27:32 | \n",
+ " This pop-up \"Get the best Spotify experience o... | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2022-07-09 13:26:45 | \n",
+ " Really buggy and terrible to use as of recently | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2022-07-09 13:20:49 | \n",
+ " Dear Spotify why do I get songs that I didn't ... | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Time_submitted Review \\\n",
+ "0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n",
+ "1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n",
+ "2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n",
+ "3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n",
+ "4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n",
+ "\n",
+ " Rating Total_thumbsup Reply \n",
+ "0 5 2 NaN \n",
+ "1 5 1 NaN \n",
+ "2 4 0 NaN \n",
+ "3 1 1 NaN \n",
+ "4 1 1 NaN "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv0.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Time_submitted | \n",
+ " Review | \n",
+ " Rating | \n",
+ " Total_thumbsup | \n",
+ " Reply | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 61589 | \n",
+ " 2022-01-01 03:01:29 | \n",
+ " Even though it was communicated that lyrics fe... | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 61590 | \n",
+ " 2022-01-01 02:13:40 | \n",
+ " Use to be sooo good back when I had it, and wh... | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 61591 | \n",
+ " 2022-01-01 01:02:29 | \n",
+ " This app would be good if not for it taking ov... | \n",
+ " 2 | \n",
+ " 10 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 61592 | \n",
+ " 2022-01-01 00:49:23 | \n",
+ " The app is good hard to navigate and won't jus... | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 61593 | \n",
+ " 2022-01-01 00:19:09 | \n",
+ " Its good but sometimes it doesnt load the musi... | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Time_submitted Review \\\n",
+ "61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n",
+ "61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n",
+ "61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n",
+ "61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n",
+ "61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n",
+ "\n",
+ " Rating Total_thumbsup Reply \n",
+ "61589 1 6 NaN \n",
+ "61590 1 0 NaN \n",
+ "61591 2 10 NaN \n",
+ "61592 2 1 NaN \n",
+ "61593 4 0 NaN "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv0.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "1efba903-5004-4d7b-a1ee-42f333111055",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " NAME | \n",
+ " host id | \n",
+ " host_identity_verified | \n",
+ " host name | \n",
+ " neighbourhood group | \n",
+ " neighbourhood | \n",
+ " lat | \n",
+ " long | \n",
+ " country | \n",
+ " ... | \n",
+ " service fee | \n",
+ " minimum nights | \n",
+ " number of reviews | \n",
+ " last review | \n",
+ " reviews per month | \n",
+ " review rate number | \n",
+ " calculated host listings count | \n",
+ " availability 365 | \n",
+ " house_rules | \n",
+ " license | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1001254 | \n",
+ " Clean & quiet apt home by the park | \n",
+ " 80014485718 | \n",
+ " unconfirmed | \n",
+ " Madaline | \n",
+ " Brooklyn | \n",
+ " Kensington | \n",
+ " 40.64749 | \n",
+ " -73.97237 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $193 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ " 10/19/2021 | \n",
+ " 0.21 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 286.0 | \n",
+ " Clean up and treat the home the way you'd like... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1002102 | \n",
+ " Skylit Midtown Castle | \n",
+ " 52335172823 | \n",
+ " verified | \n",
+ " Jenna | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $28 | \n",
+ " 30.0 | \n",
+ " 45.0 | \n",
+ " 5/21/2022 | \n",
+ " 0.38 | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ " 228.0 | \n",
+ " Pet friendly but please confirm with me if the... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1002403 | \n",
+ " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
+ " 78829239556 | \n",
+ " NaN | \n",
+ " Elise | \n",
+ " Manhattan | \n",
+ " Harlem | \n",
+ " 40.80902 | \n",
+ " -73.94190 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $124 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " 352.0 | \n",
+ " I encourage you to use my kitchen, cooking and... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1002755 | \n",
+ " NaN | \n",
+ " 85098326012 | \n",
+ " unconfirmed | \n",
+ " Garry | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $74 | \n",
+ " 30.0 | \n",
+ " 270.0 | \n",
+ " 7/5/2019 | \n",
+ " 4.64 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " 322.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1003689 | \n",
+ " Entire Apt: Spacious Studio/Loft by central park | \n",
+ " 92037596077 | \n",
+ " verified | \n",
+ " Lyndon | \n",
+ " Manhattan | \n",
+ " East Harlem | \n",
+ " 40.79851 | \n",
+ " -73.94399 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $41 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ " 11/19/2018 | \n",
+ " 0.10 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 289.0 | \n",
+ " Please no smoking in the house, porch or on th... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id NAME host id \\\n",
+ "0 1001254 Clean & quiet apt home by the park 80014485718 \n",
+ "1 1002102 Skylit Midtown Castle 52335172823 \n",
+ "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
+ "3 1002755 NaN 85098326012 \n",
+ "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
+ "\n",
+ " host_identity_verified host name neighbourhood group neighbourhood \\\n",
+ "0 unconfirmed Madaline Brooklyn Kensington \n",
+ "1 verified Jenna Manhattan Midtown \n",
+ "2 NaN Elise Manhattan Harlem \n",
+ "3 unconfirmed Garry Brooklyn Clinton Hill \n",
+ "4 verified Lyndon Manhattan East Harlem \n",
+ "\n",
+ " lat long country ... service fee minimum nights \\\n",
+ "0 40.64749 -73.97237 United States ... $193 10.0 \n",
+ "1 40.75362 -73.98377 United States ... $28 30.0 \n",
+ "2 40.80902 -73.94190 United States ... $124 3.0 \n",
+ "3 40.68514 -73.95976 United States ... $74 30.0 \n",
+ "4 40.79851 -73.94399 United States ... $41 10.0 \n",
+ "\n",
+ " number of reviews last review reviews per month review rate number \\\n",
+ "0 9.0 10/19/2021 0.21 4.0 \n",
+ "1 45.0 5/21/2022 0.38 4.0 \n",
+ "2 0.0 NaN NaN 5.0 \n",
+ "3 270.0 7/5/2019 4.64 4.0 \n",
+ "4 9.0 11/19/2018 0.10 3.0 \n",
+ "\n",
+ " calculated host listings count availability 365 \\\n",
+ "0 6.0 286.0 \n",
+ "1 2.0 228.0 \n",
+ "2 1.0 352.0 \n",
+ "3 1.0 322.0 \n",
+ "4 1.0 289.0 \n",
+ "\n",
+ " house_rules license \n",
+ "0 Clean up and treat the home the way you'd like... NaN \n",
+ "1 Pet friendly but please confirm with me if the... NaN \n",
+ "2 I encourage you to use my kitchen, cooking and... NaN \n",
+ "3 NaN NaN \n",
+ "4 Please no smoking in the house, porch or on th... NaN \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv1.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "39d543be-013a-4976-942d-f9884274c7be",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " NAME | \n",
+ " host id | \n",
+ " host_identity_verified | \n",
+ " host name | \n",
+ " neighbourhood group | \n",
+ " neighbourhood | \n",
+ " lat | \n",
+ " long | \n",
+ " country | \n",
+ " ... | \n",
+ " service fee | \n",
+ " minimum nights | \n",
+ " number of reviews | \n",
+ " last review | \n",
+ " reviews per month | \n",
+ " review rate number | \n",
+ " calculated host listings count | \n",
+ " availability 365 | \n",
+ " house_rules | \n",
+ " license | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1001254 | \n",
+ " Clean & quiet apt home by the park | \n",
+ " 80014485718 | \n",
+ " unconfirmed | \n",
+ " Madaline | \n",
+ " Brooklyn | \n",
+ " Kensington | \n",
+ " 40.64749 | \n",
+ " -73.97237 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $193 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ " 10/19/2021 | \n",
+ " 0.21 | \n",
+ " 4.0 | \n",
+ " 6.0 | \n",
+ " 286.0 | \n",
+ " Clean up and treat the home the way you'd like... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1002102 | \n",
+ " Skylit Midtown Castle | \n",
+ " 52335172823 | \n",
+ " verified | \n",
+ " Jenna | \n",
+ " Manhattan | \n",
+ " Midtown | \n",
+ " 40.75362 | \n",
+ " -73.98377 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $28 | \n",
+ " 30.0 | \n",
+ " 45.0 | \n",
+ " 5/21/2022 | \n",
+ " 0.38 | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ " 228.0 | \n",
+ " Pet friendly but please confirm with me if the... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1002403 | \n",
+ " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
+ " 78829239556 | \n",
+ " NaN | \n",
+ " Elise | \n",
+ " Manhattan | \n",
+ " Harlem | \n",
+ " 40.80902 | \n",
+ " -73.94190 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $124 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 5.0 | \n",
+ " 1.0 | \n",
+ " 352.0 | \n",
+ " I encourage you to use my kitchen, cooking and... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1002755 | \n",
+ " NaN | \n",
+ " 85098326012 | \n",
+ " unconfirmed | \n",
+ " Garry | \n",
+ " Brooklyn | \n",
+ " Clinton Hill | \n",
+ " 40.68514 | \n",
+ " -73.95976 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $74 | \n",
+ " 30.0 | \n",
+ " 270.0 | \n",
+ " 7/5/2019 | \n",
+ " 4.64 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " 322.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1003689 | \n",
+ " Entire Apt: Spacious Studio/Loft by central park | \n",
+ " 92037596077 | \n",
+ " verified | \n",
+ " Lyndon | \n",
+ " Manhattan | \n",
+ " East Harlem | \n",
+ " 40.79851 | \n",
+ " -73.94399 | \n",
+ " United States | \n",
+ " ... | \n",
+ " $41 | \n",
+ " 10.0 | \n",
+ " 9.0 | \n",
+ " 11/19/2018 | \n",
+ " 0.10 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 289.0 | \n",
+ " Please no smoking in the house, porch or on th... | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id NAME host id \\\n",
+ "0 1001254 Clean & quiet apt home by the park 80014485718 \n",
+ "1 1002102 Skylit Midtown Castle 52335172823 \n",
+ "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
+ "3 1002755 NaN 85098326012 \n",
+ "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
+ "\n",
+ " host_identity_verified host name neighbourhood group neighbourhood \\\n",
+ "0 unconfirmed Madaline Brooklyn Kensington \n",
+ "1 verified Jenna Manhattan Midtown \n",
+ "2 NaN Elise Manhattan Harlem \n",
+ "3 unconfirmed Garry Brooklyn Clinton Hill \n",
+ "4 verified Lyndon Manhattan East Harlem \n",
+ "\n",
+ " lat long country ... service fee minimum nights \\\n",
+ "0 40.64749 -73.97237 United States ... $193 10.0 \n",
+ "1 40.75362 -73.98377 United States ... $28 30.0 \n",
+ "2 40.80902 -73.94190 United States ... $124 3.0 \n",
+ "3 40.68514 -73.95976 United States ... $74 30.0 \n",
+ "4 40.79851 -73.94399 United States ... $41 10.0 \n",
+ "\n",
+ " number of reviews last review reviews per month review rate number \\\n",
+ "0 9.0 10/19/2021 0.21 4.0 \n",
+ "1 45.0 5/21/2022 0.38 4.0 \n",
+ "2 0.0 NaN NaN 5.0 \n",
+ "3 270.0 7/5/2019 4.64 4.0 \n",
+ "4 9.0 11/19/2018 0.10 3.0 \n",
+ "\n",
+ " calculated host listings count availability 365 \\\n",
+ "0 6.0 286.0 \n",
+ "1 2.0 228.0 \n",
+ "2 1.0 352.0 \n",
+ "3 1.0 322.0 \n",
+ "4 1.0 289.0 \n",
+ "\n",
+ " house_rules license \n",
+ "0 Clean up and treat the home the way you'd like... NaN \n",
+ "1 Pet friendly but please confirm with me if the... NaN \n",
+ "2 I encourage you to use my kitchen, cooking and... NaN \n",
+ "3 NaN NaN \n",
+ "4 Please no smoking in the house, porch or on th... NaN \n",
+ "\n",
+ "[5 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv1.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "95f93b29-94be-4c93-9793-cf51c2ba2442",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " review_id | \n",
+ " pseudo_author_id | \n",
+ " author_name | \n",
+ " review_text | \n",
+ " review_rating | \n",
+ " review_likes | \n",
+ " author_app_version | \n",
+ " review_timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 6caba53d-789d-4733-bad5-c7491daf80f2 | \n",
+ " 152618553977019693742 | \n",
+ " A Google user | \n",
+ " Nice app need to add red light cam. | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0.99.2.3 | \n",
+ " 2009-06-30 16:48:15 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 30c15838-8b02-4dae-8f51-25905cb40b68 | \n",
+ " 234382942865437071667 | \n",
+ " A Google user | \n",
+ " Really cool social app. Lots of potential to b... | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0.99.2.3 | \n",
+ " 2009-06-30 16:58:43 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " c090400e-f88f-4129-930d-a650f3163a11 | \n",
+ " 174473604608358796368 | \n",
+ " A Google user | \n",
+ " I was all excited about this app (ehat a great... | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.99.2.3 | \n",
+ " 2009-06-30 17:08:33 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " f6f37456-793b-4786-af6e-454a811361bf | \n",
+ " 286593453219054880269 | \n",
+ " A Google user | \n",
+ " I love this app! Lol | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 0.99.2.3 | \n",
+ " 2009-06-30 17:37:22 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " 8ae5d962-7c0c-476d-82fa-79f6e5484acc | \n",
+ " 167276875678680630145 | \n",
+ " A Google user | \n",
+ " Great app i like the idea of your car being pa... | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0.99.2.3 | \n",
+ " 2009-06-30 23:58:43 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 review_id pseudo_author_id \\\n",
+ "0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n",
+ "1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n",
+ "2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n",
+ "3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n",
+ "4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n",
+ "\n",
+ " author_name review_text \\\n",
+ "0 A Google user Nice app need to add red light cam. \n",
+ "1 A Google user Really cool social app. Lots of potential to b... \n",
+ "2 A Google user I was all excited about this app (ehat a great... \n",
+ "3 A Google user I love this app! Lol \n",
+ "4 A Google user Great app i like the idea of your car being pa... \n",
+ "\n",
+ " review_rating review_likes author_app_version review_timestamp \n",
+ "0 5 0 0.99.2.3 2009-06-30 16:48:15 \n",
+ "1 5 0 0.99.2.3 2009-06-30 16:58:43 \n",
+ "2 1 0 0.99.2.3 2009-06-30 17:08:33 \n",
+ "3 5 0 0.99.2.3 2009-06-30 17:37:22 \n",
+ "4 4 0 0.99.2.3 2009-06-30 23:58:43 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv02.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " review_id | \n",
+ " pseudo_author_id | \n",
+ " author_name | \n",
+ " review_text | \n",
+ " review_rating | \n",
+ " review_likes | \n",
+ " author_app_version | \n",
+ " review_timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 780068 | \n",
+ " 780068 | \n",
+ " 01655504-5a51-4c19-b313-2bd5fa3f253a | \n",
+ " 680743620884748258838 | \n",
+ " Ma********ll | \n",
+ " Freezes | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2023-11-17 03:18:26 | \n",
+ "
\n",
+ " \n",
+ " | 780069 | \n",
+ " 780069 | \n",
+ " f04306cb-af60-4a44-aebc-c37122620319 | \n",
+ " 266638684561117704682 | \n",
+ " Zu******el | \n",
+ " To stuck | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2023-11-17 03:18:38 | \n",
+ "
\n",
+ " \n",
+ " | 780070 | \n",
+ " 780070 | \n",
+ " 894e3c41-ca20-4781-9308-70eeb060a865 | \n",
+ " 154572309081670894420 | \n",
+ " br**********ji | \n",
+ " racist made app | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 4.99.0.2 | \n",
+ " 2023-11-17 03:23:20 | \n",
+ "
\n",
+ " \n",
+ " | 780071 | \n",
+ " 780071 | \n",
+ " 4fafb0b1-485e-473e-9bcd-d5c9848424d2 | \n",
+ " 154995071911163107981 | \n",
+ " Mo***********da | \n",
+ " بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 4.99.1.1 | \n",
+ " 2023-11-17 04:05:02 | \n",
+ "
\n",
+ " \n",
+ " | 780072 | \n",
+ " 780072 | \n",
+ " d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 | \n",
+ " 200574835524973617311 | \n",
+ " Re***********iz | \n",
+ " Best app ever used. | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 4.99.0.2 | \n",
+ " 2023-11-17 04:06:44 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 review_id \\\n",
+ "780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n",
+ "780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n",
+ "780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n",
+ "780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n",
+ "780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n",
+ "\n",
+ " pseudo_author_id author_name \\\n",
+ "780068 680743620884748258838 Ma********ll \n",
+ "780069 266638684561117704682 Zu******el \n",
+ "780070 154572309081670894420 br**********ji \n",
+ "780071 154995071911163107981 Mo***********da \n",
+ "780072 200574835524973617311 Re***********iz \n",
+ "\n",
+ " review_text review_rating \\\n",
+ "780068 Freezes 3 \n",
+ "780069 To stuck 1 \n",
+ "780070 racist made app 1 \n",
+ "780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n",
+ "780072 Best app ever used. 5 \n",
+ "\n",
+ " review_likes author_app_version review_timestamp \n",
+ "780068 0 NaN 2023-11-17 03:18:26 \n",
+ "780069 0 NaN 2023-11-17 03:18:38 \n",
+ "780070 0 4.99.0.2 2023-11-17 03:23:20 \n",
+ "780071 0 4.99.1.1 2023-11-17 04:05:02 \n",
+ "780072 0 4.99.0.2 2023-11-17 04:06:44 "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "csv02.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " review_id | \n",
+ " user_name | \n",
+ " review_title | \n",
+ " review_description | \n",
+ " rating | \n",
+ " thumbs_up | \n",
+ " review_date | \n",
+ " developer_response | \n",
+ " developer_response_date | \n",
+ " appVersion | \n",
+ " laguage_code | \n",
+ " country_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Google Play | \n",
+ " 18d6584c-d0e9-4833-a744-f607058aee97 | \n",
+ " Milky Way | \n",
+ " NaN | \n",
+ " Suddenly, the driver can't have my location an... | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 17:48:51 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Google Play | \n",
+ " 50a08f18-cece-4ddf-b617-028844c8aa28 | \n",
+ " Bradlee Severa | \n",
+ " NaN | \n",
+ " Very cordial.. And helped with a quick turnaro... | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 17:38:35 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 4.485.10000 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Google Play | \n",
+ " b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 | \n",
+ " Amit Aggarwal | \n",
+ " NaN | \n",
+ " Very good experience | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 17:38:17 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 4.486.10002 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Google Play | \n",
+ " 502702a9-25ed-4373-a96c-7fa1f06caacd | \n",
+ " Bryant Inman | \n",
+ " NaN | \n",
+ " All I use | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 17:37:45 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 4.467.10008 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Google Play | \n",
+ " f47a3fb6-23db-49bd-9e63-f33c8d724d07 | \n",
+ " Addie Whittaker | \n",
+ " NaN | \n",
+ " I have enjoyed traveling by Uber my drivers ha... | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 17:36:56 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 4.486.10002 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source review_id user_name \\\n",
+ "0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n",
+ "1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n",
+ "2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n",
+ "3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n",
+ "4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n",
+ "\n",
+ " review_title review_description rating \\\n",
+ "0 NaN Suddenly, the driver can't have my location an... 1 \n",
+ "1 NaN Very cordial.. And helped with a quick turnaro... 5 \n",
+ "2 NaN Very good experience 5 \n",
+ "3 NaN All I use 5 \n",
+ "4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n",
+ "\n",
+ " thumbs_up review_date developer_response developer_response_date \\\n",
+ "0 0.0 2023-08-10 17:48:51 NaN NaN \n",
+ "1 0.0 2023-08-10 17:38:35 NaN NaN \n",
+ "2 0.0 2023-08-10 17:38:17 NaN NaN \n",
+ "3 0.0 2023-08-10 17:37:45 NaN NaN \n",
+ "4 0.0 2023-08-10 17:36:56 NaN NaN \n",
+ "\n",
+ " appVersion laguage_code country_code \n",
+ "0 NaN en in \n",
+ "1 4.485.10000 en in \n",
+ "2 4.486.10002 en in \n",
+ "3 4.467.10008 en in \n",
+ "4 4.486.10002 en in "
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n",
+ "uber.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "d4ace5a2-346a-4099-9854-1cac2749a216",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(1069616, 13)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(np.shape(uber))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " source | \n",
+ " review_id | \n",
+ " user_name | \n",
+ " review_title | \n",
+ " review_description | \n",
+ " rating | \n",
+ " thumbs_up | \n",
+ " review_date | \n",
+ " developer_response | \n",
+ " developer_response_date | \n",
+ " appVersion | \n",
+ " laguage_code | \n",
+ " country_code | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Google Play | \n",
+ " fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 | \n",
+ " Puipuii Ralte | \n",
+ " NaN | \n",
+ " The map in Ola is so messed up, i have to pay ... | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 16:40:50 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6.3.2 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Google Play | \n",
+ " 5a0051fb-220a-45b2-ba94-a15a2949218f | \n",
+ " Deepak Kumar | \n",
+ " NaN | \n",
+ " Deepak Kumar.... 🙏🙏🙏🙏🙏] | \n",
+ " 5 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 16:36:14 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Google Play | \n",
+ " 71ebf933-b734-474d-bb65-a18c90906ed2 | \n",
+ " Ahamed Azarudeen | \n",
+ " NaN | \n",
+ " Such aa irresponsible app more then I waiting ... | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 16:29:31 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 6.3.1 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Google Play | \n",
+ " e1cc0010-60b3-4126-99c2-e8549088566a | \n",
+ " Rahil Syed | \n",
+ " NaN | \n",
+ " Worst | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 15:52:06 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 5.0.4 | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Google Play | \n",
+ " 77cf1be1-b428-4493-ae25-e0f288f79b8f | \n",
+ " vin 007 | \n",
+ " NaN | \n",
+ " Too much expensive .. try UBer... They are pro... | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " 2023-08-10 15:51:10 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " en | \n",
+ " in | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " source review_id user_name \\\n",
+ "0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n",
+ "1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n",
+ "2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n",
+ "3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n",
+ "4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n",
+ "\n",
+ " review_title review_description rating \\\n",
+ "0 NaN The map in Ola is so messed up, i have to pay ... 1 \n",
+ "1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n",
+ "2 NaN Such aa irresponsible app more then I waiting ... 1 \n",
+ "3 NaN Worst 1 \n",
+ "4 NaN Too much expensive .. try UBer... They are pro... 1 \n",
+ "\n",
+ " thumbs_up review_date developer_response developer_response_date \\\n",
+ "0 0.0 2023-08-10 16:40:50 NaN NaN \n",
+ "1 0.0 2023-08-10 16:36:14 NaN NaN \n",
+ "2 0.0 2023-08-10 16:29:31 NaN NaN \n",
+ "3 0.0 2023-08-10 15:52:06 NaN NaN \n",
+ "4 0.0 2023-08-10 15:51:10 NaN NaN \n",
+ "\n",
+ " appVersion laguage_code country_code \n",
+ "0 6.3.2 en in \n",
+ "1 NaN en in \n",
+ "2 6.3.1 en in \n",
+ "3 5.0.4 en in \n",
+ "4 NaN en in "
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n",
+ "ola_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "878a39c4-45d5-41d6-82b0-9c373c28e280",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "count 357678.000000\n",
+ "mean 92.402697\n",
+ "std 125.489169\n",
+ "min 1.000000\n",
+ "25% 8.000000\n",
+ "50% 33.000000\n",
+ "75% 131.000000\n",
+ "max 2877.000000\n",
+ "Name: review_length, dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check average review length\n",
+ "ola_df['review_length'] = ola_df['review_description'].str.len()\n",
+ "print(ola_df['review_length'].describe())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "1dd032ba-343b-4402-9d96-ee5e0432ab07",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substantive reviews: 204715\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Filter out very short reviews\n",
+ "substantive_reviews = ola_df[ola_df['review_length'] > 20]\n",
+ "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "count 1.069447e+06\n",
+ "mean 7.023987e+01\n",
+ "std 1.158196e+02\n",
+ "min 1.000000e+00\n",
+ "25% 8.000000e+00\n",
+ "50% 2.100000e+01\n",
+ "75% 7.800000e+01\n",
+ "max 3.792000e+03\n",
+ "Name: review_length, dtype: float64\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Check average review length\n",
+ "uber['review_length'] = uber['review_description'].str.len()\n",
+ "print(uber['review_length'].describe())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "2dd05939-e87c-443d-9012-e5f45cf64ff5",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Substantive reviews: 542110\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Filter out very short reviews\n",
+ "substantive_reviews = uber[uber['review_length'] > 20]\n",
+ "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/datasets_reviews.ipynb b/datasets_reviews.ipynb
index 910a184..14f23a8 100644
--- a/datasets_reviews.ipynb
+++ b/datasets_reviews.ipynb
@@ -2,996 +2,40 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 10,
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
- "import pandas as pd"
+ "import pandas as pd\n",
+ "from pathlib import Path"
]
},
{
"cell_type": "code",
- "execution_count": 2,
- "id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a",
+ "execution_count": 11,
+ "id": "0c897ead-dfb5-4d18-bcfc-949824a0868f",
"metadata": {},
"outputs": [],
"source": [
- "csv0 = pd.read_csv(\"spotify.csv\")"
+ "uber = Path.home() / 'google-drive' / 'Charlie_6013_RECLASS' / 'Data' / 'Raw' / 'Uber Customer Reviews.csv'"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "c0631560-c1be-4bbf-b050-b6a552e74d63",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Time_submitted | \n",
- " Review | \n",
- " Rating | \n",
- " Total_thumbsup | \n",
- " Reply | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 2022-07-09 15:00:00 | \n",
- " Great music service, the audio is high quality... | \n",
- " 5 | \n",
- " 2 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 2022-07-09 14:21:22 | \n",
- " Please ignore previous negative rating. This a... | \n",
- " 5 | \n",
- " 1 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 2022-07-09 13:27:32 | \n",
- " This pop-up \"Get the best Spotify experience o... | \n",
- " 4 | \n",
- " 0 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 2022-07-09 13:26:45 | \n",
- " Really buggy and terrible to use as of recently | \n",
- " 1 | \n",
- " 1 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 2022-07-09 13:20:49 | \n",
- " Dear Spotify why do I get songs that I didn't ... | \n",
- " 1 | \n",
- " 1 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Time_submitted Review \\\n",
- "0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n",
- "1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n",
- "2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n",
- "3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n",
- "4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n",
- "\n",
- " Rating Total_thumbsup Reply \n",
- "0 5 2 NaN \n",
- "1 5 1 NaN \n",
- "2 4 0 NaN \n",
- "3 1 1 NaN \n",
- "4 1 1 NaN "
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv0.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Time_submitted | \n",
- " Review | \n",
- " Rating | \n",
- " Total_thumbsup | \n",
- " Reply | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 61589 | \n",
- " 2022-01-01 03:01:29 | \n",
- " Even though it was communicated that lyrics fe... | \n",
- " 1 | \n",
- " 6 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 61590 | \n",
- " 2022-01-01 02:13:40 | \n",
- " Use to be sooo good back when I had it, and wh... | \n",
- " 1 | \n",
- " 0 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 61591 | \n",
- " 2022-01-01 01:02:29 | \n",
- " This app would be good if not for it taking ov... | \n",
- " 2 | \n",
- " 10 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 61592 | \n",
- " 2022-01-01 00:49:23 | \n",
- " The app is good hard to navigate and won't jus... | \n",
- " 2 | \n",
- " 1 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 61593 | \n",
- " 2022-01-01 00:19:09 | \n",
- " Its good but sometimes it doesnt load the musi... | \n",
- " 4 | \n",
- " 0 | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Time_submitted Review \\\n",
- "61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n",
- "61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n",
- "61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n",
- "61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n",
- "61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n",
- "\n",
- " Rating Total_thumbsup Reply \n",
- "61589 1 6 NaN \n",
- "61590 1 0 NaN \n",
- "61591 2 10 NaN \n",
- "61592 2 1 NaN \n",
- "61593 4 0 NaN "
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv0.tail()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6",
+ "execution_count": 12,
+ "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
"metadata": {},
"outputs": [],
"source": [
- "csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "1efba903-5004-4d7b-a1ee-42f333111055",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " NAME | \n",
- " host id | \n",
- " host_identity_verified | \n",
- " host name | \n",
- " neighbourhood group | \n",
- " neighbourhood | \n",
- " lat | \n",
- " long | \n",
- " country | \n",
- " ... | \n",
- " service fee | \n",
- " minimum nights | \n",
- " number of reviews | \n",
- " last review | \n",
- " reviews per month | \n",
- " review rate number | \n",
- " calculated host listings count | \n",
- " availability 365 | \n",
- " house_rules | \n",
- " license | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1001254 | \n",
- " Clean & quiet apt home by the park | \n",
- " 80014485718 | \n",
- " unconfirmed | \n",
- " Madaline | \n",
- " Brooklyn | \n",
- " Kensington | \n",
- " 40.64749 | \n",
- " -73.97237 | \n",
- " United States | \n",
- " ... | \n",
- " $193 | \n",
- " 10.0 | \n",
- " 9.0 | \n",
- " 10/19/2021 | \n",
- " 0.21 | \n",
- " 4.0 | \n",
- " 6.0 | \n",
- " 286.0 | \n",
- " Clean up and treat the home the way you'd like... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1002102 | \n",
- " Skylit Midtown Castle | \n",
- " 52335172823 | \n",
- " verified | \n",
- " Jenna | \n",
- " Manhattan | \n",
- " Midtown | \n",
- " 40.75362 | \n",
- " -73.98377 | \n",
- " United States | \n",
- " ... | \n",
- " $28 | \n",
- " 30.0 | \n",
- " 45.0 | \n",
- " 5/21/2022 | \n",
- " 0.38 | \n",
- " 4.0 | \n",
- " 2.0 | \n",
- " 228.0 | \n",
- " Pet friendly but please confirm with me if the... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1002403 | \n",
- " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
- " 78829239556 | \n",
- " NaN | \n",
- " Elise | \n",
- " Manhattan | \n",
- " Harlem | \n",
- " 40.80902 | \n",
- " -73.94190 | \n",
- " United States | \n",
- " ... | \n",
- " $124 | \n",
- " 3.0 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 5.0 | \n",
- " 1.0 | \n",
- " 352.0 | \n",
- " I encourage you to use my kitchen, cooking and... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1002755 | \n",
- " NaN | \n",
- " 85098326012 | \n",
- " unconfirmed | \n",
- " Garry | \n",
- " Brooklyn | \n",
- " Clinton Hill | \n",
- " 40.68514 | \n",
- " -73.95976 | \n",
- " United States | \n",
- " ... | \n",
- " $74 | \n",
- " 30.0 | \n",
- " 270.0 | \n",
- " 7/5/2019 | \n",
- " 4.64 | \n",
- " 4.0 | \n",
- " 1.0 | \n",
- " 322.0 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1003689 | \n",
- " Entire Apt: Spacious Studio/Loft by central park | \n",
- " 92037596077 | \n",
- " verified | \n",
- " Lyndon | \n",
- " Manhattan | \n",
- " East Harlem | \n",
- " 40.79851 | \n",
- " -73.94399 | \n",
- " United States | \n",
- " ... | \n",
- " $41 | \n",
- " 10.0 | \n",
- " 9.0 | \n",
- " 11/19/2018 | \n",
- " 0.10 | \n",
- " 3.0 | \n",
- " 1.0 | \n",
- " 289.0 | \n",
- " Please no smoking in the house, porch or on th... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 26 columns
\n",
- "
"
- ],
- "text/plain": [
- " id NAME host id \\\n",
- "0 1001254 Clean & quiet apt home by the park 80014485718 \n",
- "1 1002102 Skylit Midtown Castle 52335172823 \n",
- "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
- "3 1002755 NaN 85098326012 \n",
- "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
- "\n",
- " host_identity_verified host name neighbourhood group neighbourhood \\\n",
- "0 unconfirmed Madaline Brooklyn Kensington \n",
- "1 verified Jenna Manhattan Midtown \n",
- "2 NaN Elise Manhattan Harlem \n",
- "3 unconfirmed Garry Brooklyn Clinton Hill \n",
- "4 verified Lyndon Manhattan East Harlem \n",
- "\n",
- " lat long country ... service fee minimum nights \\\n",
- "0 40.64749 -73.97237 United States ... $193 10.0 \n",
- "1 40.75362 -73.98377 United States ... $28 30.0 \n",
- "2 40.80902 -73.94190 United States ... $124 3.0 \n",
- "3 40.68514 -73.95976 United States ... $74 30.0 \n",
- "4 40.79851 -73.94399 United States ... $41 10.0 \n",
- "\n",
- " number of reviews last review reviews per month review rate number \\\n",
- "0 9.0 10/19/2021 0.21 4.0 \n",
- "1 45.0 5/21/2022 0.38 4.0 \n",
- "2 0.0 NaN NaN 5.0 \n",
- "3 270.0 7/5/2019 4.64 4.0 \n",
- "4 9.0 11/19/2018 0.10 3.0 \n",
- "\n",
- " calculated host listings count availability 365 \\\n",
- "0 6.0 286.0 \n",
- "1 2.0 228.0 \n",
- "2 1.0 352.0 \n",
- "3 1.0 322.0 \n",
- "4 1.0 289.0 \n",
- "\n",
- " house_rules license \n",
- "0 Clean up and treat the home the way you'd like... NaN \n",
- "1 Pet friendly but please confirm with me if the... NaN \n",
- "2 I encourage you to use my kitchen, cooking and... NaN \n",
- "3 NaN NaN \n",
- "4 Please no smoking in the house, porch or on th... NaN \n",
- "\n",
- "[5 rows x 26 columns]"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv1.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "39d543be-013a-4976-942d-f9884274c7be",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " id | \n",
- " NAME | \n",
- " host id | \n",
- " host_identity_verified | \n",
- " host name | \n",
- " neighbourhood group | \n",
- " neighbourhood | \n",
- " lat | \n",
- " long | \n",
- " country | \n",
- " ... | \n",
- " service fee | \n",
- " minimum nights | \n",
- " number of reviews | \n",
- " last review | \n",
- " reviews per month | \n",
- " review rate number | \n",
- " calculated host listings count | \n",
- " availability 365 | \n",
- " house_rules | \n",
- " license | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1001254 | \n",
- " Clean & quiet apt home by the park | \n",
- " 80014485718 | \n",
- " unconfirmed | \n",
- " Madaline | \n",
- " Brooklyn | \n",
- " Kensington | \n",
- " 40.64749 | \n",
- " -73.97237 | \n",
- " United States | \n",
- " ... | \n",
- " $193 | \n",
- " 10.0 | \n",
- " 9.0 | \n",
- " 10/19/2021 | \n",
- " 0.21 | \n",
- " 4.0 | \n",
- " 6.0 | \n",
- " 286.0 | \n",
- " Clean up and treat the home the way you'd like... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1002102 | \n",
- " Skylit Midtown Castle | \n",
- " 52335172823 | \n",
- " verified | \n",
- " Jenna | \n",
- " Manhattan | \n",
- " Midtown | \n",
- " 40.75362 | \n",
- " -73.98377 | \n",
- " United States | \n",
- " ... | \n",
- " $28 | \n",
- " 30.0 | \n",
- " 45.0 | \n",
- " 5/21/2022 | \n",
- " 0.38 | \n",
- " 4.0 | \n",
- " 2.0 | \n",
- " 228.0 | \n",
- " Pet friendly but please confirm with me if the... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1002403 | \n",
- " THE VILLAGE OF HARLEM....NEW YORK ! | \n",
- " 78829239556 | \n",
- " NaN | \n",
- " Elise | \n",
- " Manhattan | \n",
- " Harlem | \n",
- " 40.80902 | \n",
- " -73.94190 | \n",
- " United States | \n",
- " ... | \n",
- " $124 | \n",
- " 3.0 | \n",
- " 0.0 | \n",
- " NaN | \n",
- " NaN | \n",
- " 5.0 | \n",
- " 1.0 | \n",
- " 352.0 | \n",
- " I encourage you to use my kitchen, cooking and... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1002755 | \n",
- " NaN | \n",
- " 85098326012 | \n",
- " unconfirmed | \n",
- " Garry | \n",
- " Brooklyn | \n",
- " Clinton Hill | \n",
- " 40.68514 | \n",
- " -73.95976 | \n",
- " United States | \n",
- " ... | \n",
- " $74 | \n",
- " 30.0 | \n",
- " 270.0 | \n",
- " 7/5/2019 | \n",
- " 4.64 | \n",
- " 4.0 | \n",
- " 1.0 | \n",
- " 322.0 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1003689 | \n",
- " Entire Apt: Spacious Studio/Loft by central park | \n",
- " 92037596077 | \n",
- " verified | \n",
- " Lyndon | \n",
- " Manhattan | \n",
- " East Harlem | \n",
- " 40.79851 | \n",
- " -73.94399 | \n",
- " United States | \n",
- " ... | \n",
- " $41 | \n",
- " 10.0 | \n",
- " 9.0 | \n",
- " 11/19/2018 | \n",
- " 0.10 | \n",
- " 3.0 | \n",
- " 1.0 | \n",
- " 289.0 | \n",
- " Please no smoking in the house, porch or on th... | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 26 columns
\n",
- "
"
- ],
- "text/plain": [
- " id NAME host id \\\n",
- "0 1001254 Clean & quiet apt home by the park 80014485718 \n",
- "1 1002102 Skylit Midtown Castle 52335172823 \n",
- "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
- "3 1002755 NaN 85098326012 \n",
- "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
- "\n",
- " host_identity_verified host name neighbourhood group neighbourhood \\\n",
- "0 unconfirmed Madaline Brooklyn Kensington \n",
- "1 verified Jenna Manhattan Midtown \n",
- "2 NaN Elise Manhattan Harlem \n",
- "3 unconfirmed Garry Brooklyn Clinton Hill \n",
- "4 verified Lyndon Manhattan East Harlem \n",
- "\n",
- " lat long country ... service fee minimum nights \\\n",
- "0 40.64749 -73.97237 United States ... $193 10.0 \n",
- "1 40.75362 -73.98377 United States ... $28 30.0 \n",
- "2 40.80902 -73.94190 United States ... $124 3.0 \n",
- "3 40.68514 -73.95976 United States ... $74 30.0 \n",
- "4 40.79851 -73.94399 United States ... $41 10.0 \n",
- "\n",
- " number of reviews last review reviews per month review rate number \\\n",
- "0 9.0 10/19/2021 0.21 4.0 \n",
- "1 45.0 5/21/2022 0.38 4.0 \n",
- "2 0.0 NaN NaN 5.0 \n",
- "3 270.0 7/5/2019 4.64 4.0 \n",
- "4 9.0 11/19/2018 0.10 3.0 \n",
- "\n",
- " calculated host listings count availability 365 \\\n",
- "0 6.0 286.0 \n",
- "1 2.0 228.0 \n",
- "2 1.0 352.0 \n",
- "3 1.0 322.0 \n",
- "4 1.0 289.0 \n",
- "\n",
- " house_rules license \n",
- "0 Clean up and treat the home the way you'd like... NaN \n",
- "1 Pet friendly but please confirm with me if the... NaN \n",
- "2 I encourage you to use my kitchen, cooking and... NaN \n",
- "3 NaN NaN \n",
- "4 Please no smoking in the house, porch or on th... NaN \n",
- "\n",
- "[5 rows x 26 columns]"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv1.head()"
+ "uber_df = pd.read_csv(uber, low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
- "id": "95f93b29-94be-4c93-9793-cf51c2ba2442",
- "metadata": {},
- "outputs": [],
- "source": [
- "csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " review_id | \n",
- " pseudo_author_id | \n",
- " author_name | \n",
- " review_text | \n",
- " review_rating | \n",
- " review_likes | \n",
- " author_app_version | \n",
- " review_timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 0 | \n",
- " 6caba53d-789d-4733-bad5-c7491daf80f2 | \n",
- " 152618553977019693742 | \n",
- " A Google user | \n",
- " Nice app need to add red light cam. | \n",
- " 5 | \n",
- " 0 | \n",
- " 0.99.2.3 | \n",
- " 2009-06-30 16:48:15 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1 | \n",
- " 30c15838-8b02-4dae-8f51-25905cb40b68 | \n",
- " 234382942865437071667 | \n",
- " A Google user | \n",
- " Really cool social app. Lots of potential to b... | \n",
- " 5 | \n",
- " 0 | \n",
- " 0.99.2.3 | \n",
- " 2009-06-30 16:58:43 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 2 | \n",
- " c090400e-f88f-4129-930d-a650f3163a11 | \n",
- " 174473604608358796368 | \n",
- " A Google user | \n",
- " I was all excited about this app (ehat a great... | \n",
- " 1 | \n",
- " 0 | \n",
- " 0.99.2.3 | \n",
- " 2009-06-30 17:08:33 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 3 | \n",
- " f6f37456-793b-4786-af6e-454a811361bf | \n",
- " 286593453219054880269 | \n",
- " A Google user | \n",
- " I love this app! Lol | \n",
- " 5 | \n",
- " 0 | \n",
- " 0.99.2.3 | \n",
- " 2009-06-30 17:37:22 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 4 | \n",
- " 8ae5d962-7c0c-476d-82fa-79f6e5484acc | \n",
- " 167276875678680630145 | \n",
- " A Google user | \n",
- " Great app i like the idea of your car being pa... | \n",
- " 4 | \n",
- " 0 | \n",
- " 0.99.2.3 | \n",
- " 2009-06-30 23:58:43 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0 review_id pseudo_author_id \\\n",
- "0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n",
- "1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n",
- "2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n",
- "3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n",
- "4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n",
- "\n",
- " author_name review_text \\\n",
- "0 A Google user Nice app need to add red light cam. \n",
- "1 A Google user Really cool social app. Lots of potential to b... \n",
- "2 A Google user I was all excited about this app (ehat a great... \n",
- "3 A Google user I love this app! Lol \n",
- "4 A Google user Great app i like the idea of your car being pa... \n",
- "\n",
- " review_rating review_likes author_app_version review_timestamp \n",
- "0 5 0 0.99.2.3 2009-06-30 16:48:15 \n",
- "1 5 0 0.99.2.3 2009-06-30 16:58:43 \n",
- "2 1 0 0.99.2.3 2009-06-30 17:08:33 \n",
- "3 5 0 0.99.2.3 2009-06-30 17:37:22 \n",
- "4 4 0 0.99.2.3 2009-06-30 23:58:43 "
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv02.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Unnamed: 0 | \n",
- " review_id | \n",
- " pseudo_author_id | \n",
- " author_name | \n",
- " review_text | \n",
- " review_rating | \n",
- " review_likes | \n",
- " author_app_version | \n",
- " review_timestamp | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 780068 | \n",
- " 780068 | \n",
- " 01655504-5a51-4c19-b313-2bd5fa3f253a | \n",
- " 680743620884748258838 | \n",
- " Ma********ll | \n",
- " Freezes | \n",
- " 3 | \n",
- " 0 | \n",
- " NaN | \n",
- " 2023-11-17 03:18:26 | \n",
- "
\n",
- " \n",
- " | 780069 | \n",
- " 780069 | \n",
- " f04306cb-af60-4a44-aebc-c37122620319 | \n",
- " 266638684561117704682 | \n",
- " Zu******el | \n",
- " To stuck | \n",
- " 1 | \n",
- " 0 | \n",
- " NaN | \n",
- " 2023-11-17 03:18:38 | \n",
- "
\n",
- " \n",
- " | 780070 | \n",
- " 780070 | \n",
- " 894e3c41-ca20-4781-9308-70eeb060a865 | \n",
- " 154572309081670894420 | \n",
- " br**********ji | \n",
- " racist made app | \n",
- " 1 | \n",
- " 0 | \n",
- " 4.99.0.2 | \n",
- " 2023-11-17 03:23:20 | \n",
- "
\n",
- " \n",
- " | 780071 | \n",
- " 780071 | \n",
- " 4fafb0b1-485e-473e-9bcd-d5c9848424d2 | \n",
- " 154995071911163107981 | \n",
- " Mo***********da | \n",
- " بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... | \n",
- " 5 | \n",
- " 0 | \n",
- " 4.99.1.1 | \n",
- " 2023-11-17 04:05:02 | \n",
- "
\n",
- " \n",
- " | 780072 | \n",
- " 780072 | \n",
- " d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 | \n",
- " 200574835524973617311 | \n",
- " Re***********iz | \n",
- " Best app ever used. | \n",
- " 5 | \n",
- " 0 | \n",
- " 4.99.0.2 | \n",
- " 2023-11-17 04:06:44 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Unnamed: 0 review_id \\\n",
- "780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n",
- "780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n",
- "780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n",
- "780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n",
- "780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n",
- "\n",
- " pseudo_author_id author_name \\\n",
- "780068 680743620884748258838 Ma********ll \n",
- "780069 266638684561117704682 Zu******el \n",
- "780070 154572309081670894420 br**********ji \n",
- "780071 154995071911163107981 Mo***********da \n",
- "780072 200574835524973617311 Re***********iz \n",
- "\n",
- " review_text review_rating \\\n",
- "780068 Freezes 3 \n",
- "780069 To stuck 1 \n",
- "780070 racist made app 1 \n",
- "780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n",
- "780072 Best app ever used. 5 \n",
- "\n",
- " review_likes author_app_version review_timestamp \n",
- "780068 0 NaN 2023-11-17 03:18:26 \n",
- "780069 0 NaN 2023-11-17 03:18:38 \n",
- "780070 0 4.99.0.2 2023-11-17 03:23:20 \n",
- "780071 0 4.99.1.1 2023-11-17 04:05:02 \n",
- "780072 0 4.99.0.2 2023-11-17 04:06:44 "
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "csv02.tail()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d",
+ "id": "9b8469b3-c606-461f-aaef-9619b7dc1ffd",
"metadata": {},
"outputs": [
{
@@ -1145,303 +189,141 @@
"4 4.486.10002 en in "
]
},
- "execution_count": 20,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n",
- "uber.head()"
+ "uber_df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 24,
- "id": "d4ace5a2-346a-4099-9854-1cac2749a216",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(1069616, 13)\n"
- ]
- }
- ],
- "source": [
- "print(np.shape(uber))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c",
+ "execution_count": 15,
+ "id": "1709a2cc-4f7a-4e77-994e-68668612caff",
"metadata": {},
"outputs": [
{
"data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " source | \n",
- " review_id | \n",
- " user_name | \n",
- " review_title | \n",
- " review_description | \n",
- " rating | \n",
- " thumbs_up | \n",
- " review_date | \n",
- " developer_response | \n",
- " developer_response_date | \n",
- " appVersion | \n",
- " laguage_code | \n",
- " country_code | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " Google Play | \n",
- " fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 | \n",
- " Puipuii Ralte | \n",
- " NaN | \n",
- " The map in Ola is so messed up, i have to pay ... | \n",
- " 1 | \n",
- " 0.0 | \n",
- " 2023-08-10 16:40:50 | \n",
- " NaN | \n",
- " NaN | \n",
- " 6.3.2 | \n",
- " en | \n",
- " in | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " Google Play | \n",
- " 5a0051fb-220a-45b2-ba94-a15a2949218f | \n",
- " Deepak Kumar | \n",
- " NaN | \n",
- " Deepak Kumar.... 🙏🙏🙏🙏🙏] | \n",
- " 5 | \n",
- " 0.0 | \n",
- " 2023-08-10 16:36:14 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " en | \n",
- " in | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " Google Play | \n",
- " 71ebf933-b734-474d-bb65-a18c90906ed2 | \n",
- " Ahamed Azarudeen | \n",
- " NaN | \n",
- " Such aa irresponsible app more then I waiting ... | \n",
- " 1 | \n",
- " 0.0 | \n",
- " 2023-08-10 16:29:31 | \n",
- " NaN | \n",
- " NaN | \n",
- " 6.3.1 | \n",
- " en | \n",
- " in | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " Google Play | \n",
- " e1cc0010-60b3-4126-99c2-e8549088566a | \n",
- " Rahil Syed | \n",
- " NaN | \n",
- " Worst | \n",
- " 1 | \n",
- " 0.0 | \n",
- " 2023-08-10 15:52:06 | \n",
- " NaN | \n",
- " NaN | \n",
- " 5.0.4 | \n",
- " en | \n",
- " in | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " Google Play | \n",
- " 77cf1be1-b428-4493-ae25-e0f288f79b8f | \n",
- " vin 007 | \n",
- " NaN | \n",
- " Too much expensive .. try UBer... They are pro... | \n",
- " 1 | \n",
- " 0.0 | \n",
- " 2023-08-10 15:51:10 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " en | \n",
- " in | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
"text/plain": [
- " source review_id user_name \\\n",
- "0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n",
- "1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n",
- "2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n",
- "3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n",
- "4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n",
- "\n",
- " review_title review_description rating \\\n",
- "0 NaN The map in Ola is so messed up, i have to pay ... 1 \n",
- "1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n",
- "2 NaN Such aa irresponsible app more then I waiting ... 1 \n",
- "3 NaN Worst 1 \n",
- "4 NaN Too much expensive .. try UBer... They are pro... 1 \n",
- "\n",
- " thumbs_up review_date developer_response developer_response_date \\\n",
- "0 0.0 2023-08-10 16:40:50 NaN NaN \n",
- "1 0.0 2023-08-10 16:36:14 NaN NaN \n",
- "2 0.0 2023-08-10 16:29:31 NaN NaN \n",
- "3 0.0 2023-08-10 15:52:06 NaN NaN \n",
- "4 0.0 2023-08-10 15:51:10 NaN NaN \n",
- "\n",
- " appVersion laguage_code country_code \n",
- "0 6.3.2 en in \n",
- "1 NaN en in \n",
- "2 6.3.1 en in \n",
- "3 5.0.4 en in \n",
- "4 NaN en in "
+ "(1069616, 13)"
]
},
- "execution_count": 27,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n",
- "ola_df.head()"
+ "np.shape(uber_df)"
]
},
{
"cell_type": "code",
- "execution_count": 28,
- "id": "878a39c4-45d5-41d6-82b0-9c373c28e280",
+ "execution_count": 17,
+ "id": "06c0c03c-14ba-4451-a6ea-44d36e85327c",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "count 357678.000000\n",
- "mean 92.402697\n",
- "std 125.489169\n",
- "min 1.000000\n",
- "25% 8.000000\n",
- "50% 33.000000\n",
- "75% 131.000000\n",
- "max 2877.000000\n",
- "Name: review_length, dtype: float64\n"
- ]
+ "data": {
+ "text/plain": [
+ "['source',\n",
+ " 'review_id',\n",
+ " 'user_name',\n",
+ " 'review_title',\n",
+ " 'review_description',\n",
+ " 'rating',\n",
+ " 'thumbs_up',\n",
+ " 'review_date',\n",
+ " 'developer_response',\n",
+ " 'developer_response_date',\n",
+ " 'appVersion',\n",
+ " 'laguage_code',\n",
+ " 'country_code']"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Check average review length\n",
- "ola_df['review_length'] = ola_df['review_description'].str.len()\n",
- "print(ola_df['review_length'].describe())\n",
- "\n"
+ "uber_df.columns.tolist()"
]
},
{
"cell_type": "code",
- "execution_count": 29,
- "id": "1dd032ba-343b-4402-9d96-ee5e0432ab07",
+ "execution_count": 19,
+ "id": "d22d3bce-eac0-4d02-a4ef-38343f4958ff",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Substantive reviews: 204715\n"
- ]
+ "data": {
+ "text/plain": [
+ "source object\n",
+ "review_id object\n",
+ "user_name object\n",
+ "review_title object\n",
+ "review_description object\n",
+ "rating int64\n",
+ "thumbs_up float64\n",
+ "review_date object\n",
+ "developer_response object\n",
+ "developer_response_date object\n",
+ "appVersion object\n",
+ "laguage_code object\n",
+ "country_code object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Filter out very short reviews\n",
- "substantive_reviews = ola_df[ola_df['review_length'] > 20]\n",
- "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
+ "uber_df.dtypes"
]
},
{
"cell_type": "code",
- "execution_count": 30,
- "id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145",
+ "execution_count": 22,
+ "id": "e08f5eae-7921-4526-b8fd-29038c55e1bb",
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "count 1.069447e+06\n",
- "mean 7.023987e+01\n",
- "std 1.158196e+02\n",
- "min 1.000000e+00\n",
- "25% 8.000000e+00\n",
- "50% 2.100000e+01\n",
- "75% 7.800000e+01\n",
- "max 3.792000e+03\n",
- "Name: review_length, dtype: float64\n"
- ]
+ "data": {
+ "text/plain": [
+ "source 0\n",
+ "review_id 0\n",
+ "user_name 1\n",
+ "review_title 1067436\n",
+ "review_description 169\n",
+ "rating 0\n",
+ "thumbs_up 2180\n",
+ "review_date 0\n",
+ "developer_response 871352\n",
+ "developer_response_date 872338\n",
+ "appVersion 241548\n",
+ "laguage_code 0\n",
+ "country_code 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "# Check average review length\n",
- "uber['review_length'] = uber['review_description'].str.len()\n",
- "print(uber['review_length'].describe())\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "id": "2dd05939-e87c-443d-9012-e5f45cf64ff5",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Substantive reviews: 542110\n"
- ]
- }
- ],
- "source": [
- "# Filter out very short reviews\n",
- "substantive_reviews = uber[uber['review_length'] > 20]\n",
- "print(f\"Substantive reviews: {len(substantive_reviews)}\")"
+ "uber_df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
+ "id": "ea59d211-9958-46f6-bf76-65d8d36c50e4",
"metadata": {},
"outputs": [],
"source": []