1472 lines
52 KiB
Plaintext
1472 lines
52 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"csv0 = pd.read_csv(\"spotify.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "c0631560-c1be-4bbf-b050-b6a552e74d63",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Time_submitted</th>\n",
|
||
" <th>Review</th>\n",
|
||
" <th>Rating</th>\n",
|
||
" <th>Total_thumbsup</th>\n",
|
||
" <th>Reply</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2022-07-09 15:00:00</td>\n",
|
||
" <td>Great music service, the audio is high quality...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2022-07-09 14:21:22</td>\n",
|
||
" <td>Please ignore previous negative rating. This a...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2022-07-09 13:27:32</td>\n",
|
||
" <td>This pop-up \"Get the best Spotify experience o...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>2022-07-09 13:26:45</td>\n",
|
||
" <td>Really buggy and terrible to use as of recently</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>2022-07-09 13:20:49</td>\n",
|
||
" <td>Dear Spotify why do I get songs that I didn't ...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Time_submitted Review \\\n",
|
||
"0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n",
|
||
"1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n",
|
||
"2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n",
|
||
"3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n",
|
||
"4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n",
|
||
"\n",
|
||
" Rating Total_thumbsup Reply \n",
|
||
"0 5 2 NaN \n",
|
||
"1 5 1 NaN \n",
|
||
"2 4 0 NaN \n",
|
||
"3 1 1 NaN \n",
|
||
"4 1 1 NaN "
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv0.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Time_submitted</th>\n",
|
||
" <th>Review</th>\n",
|
||
" <th>Rating</th>\n",
|
||
" <th>Total_thumbsup</th>\n",
|
||
" <th>Reply</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>61589</th>\n",
|
||
" <td>2022-01-01 03:01:29</td>\n",
|
||
" <td>Even though it was communicated that lyrics fe...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61590</th>\n",
|
||
" <td>2022-01-01 02:13:40</td>\n",
|
||
" <td>Use to be sooo good back when I had it, and wh...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61591</th>\n",
|
||
" <td>2022-01-01 01:02:29</td>\n",
|
||
" <td>This app would be good if not for it taking ov...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61592</th>\n",
|
||
" <td>2022-01-01 00:49:23</td>\n",
|
||
" <td>The app is good hard to navigate and won't jus...</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>61593</th>\n",
|
||
" <td>2022-01-01 00:19:09</td>\n",
|
||
" <td>Its good but sometimes it doesnt load the musi...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Time_submitted Review \\\n",
|
||
"61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n",
|
||
"61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n",
|
||
"61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n",
|
||
"61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n",
|
||
"61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n",
|
||
"\n",
|
||
" Rating Total_thumbsup Reply \n",
|
||
"61589 1 6 NaN \n",
|
||
"61590 1 0 NaN \n",
|
||
"61591 2 10 NaN \n",
|
||
"61592 2 1 NaN \n",
|
||
"61593 4 0 NaN "
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv0.tail()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "1efba903-5004-4d7b-a1ee-42f333111055",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>NAME</th>\n",
|
||
" <th>host id</th>\n",
|
||
" <th>host_identity_verified</th>\n",
|
||
" <th>host name</th>\n",
|
||
" <th>neighbourhood group</th>\n",
|
||
" <th>neighbourhood</th>\n",
|
||
" <th>lat</th>\n",
|
||
" <th>long</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>service fee</th>\n",
|
||
" <th>minimum nights</th>\n",
|
||
" <th>number of reviews</th>\n",
|
||
" <th>last review</th>\n",
|
||
" <th>reviews per month</th>\n",
|
||
" <th>review rate number</th>\n",
|
||
" <th>calculated host listings count</th>\n",
|
||
" <th>availability 365</th>\n",
|
||
" <th>house_rules</th>\n",
|
||
" <th>license</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1001254</td>\n",
|
||
" <td>Clean & quiet apt home by the park</td>\n",
|
||
" <td>80014485718</td>\n",
|
||
" <td>unconfirmed</td>\n",
|
||
" <td>Madaline</td>\n",
|
||
" <td>Brooklyn</td>\n",
|
||
" <td>Kensington</td>\n",
|
||
" <td>40.64749</td>\n",
|
||
" <td>-73.97237</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$193</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>10/19/2021</td>\n",
|
||
" <td>0.21</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>286.0</td>\n",
|
||
" <td>Clean up and treat the home the way you'd like...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1002102</td>\n",
|
||
" <td>Skylit Midtown Castle</td>\n",
|
||
" <td>52335172823</td>\n",
|
||
" <td>verified</td>\n",
|
||
" <td>Jenna</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>Midtown</td>\n",
|
||
" <td>40.75362</td>\n",
|
||
" <td>-73.98377</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$28</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>45.0</td>\n",
|
||
" <td>5/21/2022</td>\n",
|
||
" <td>0.38</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>228.0</td>\n",
|
||
" <td>Pet friendly but please confirm with me if the...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1002403</td>\n",
|
||
" <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
|
||
" <td>78829239556</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Elise</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>Harlem</td>\n",
|
||
" <td>40.80902</td>\n",
|
||
" <td>-73.94190</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$124</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>352.0</td>\n",
|
||
" <td>I encourage you to use my kitchen, cooking and...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1002755</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>85098326012</td>\n",
|
||
" <td>unconfirmed</td>\n",
|
||
" <td>Garry</td>\n",
|
||
" <td>Brooklyn</td>\n",
|
||
" <td>Clinton Hill</td>\n",
|
||
" <td>40.68514</td>\n",
|
||
" <td>-73.95976</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$74</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>270.0</td>\n",
|
||
" <td>7/5/2019</td>\n",
|
||
" <td>4.64</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>322.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1003689</td>\n",
|
||
" <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
|
||
" <td>92037596077</td>\n",
|
||
" <td>verified</td>\n",
|
||
" <td>Lyndon</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>East Harlem</td>\n",
|
||
" <td>40.79851</td>\n",
|
||
" <td>-73.94399</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$41</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>11/19/2018</td>\n",
|
||
" <td>0.10</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>289.0</td>\n",
|
||
" <td>Please no smoking in the house, porch or on th...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 26 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id NAME host id \\\n",
|
||
"0 1001254 Clean & quiet apt home by the park 80014485718 \n",
|
||
"1 1002102 Skylit Midtown Castle 52335172823 \n",
|
||
"2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
|
||
"3 1002755 NaN 85098326012 \n",
|
||
"4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
|
||
"\n",
|
||
" host_identity_verified host name neighbourhood group neighbourhood \\\n",
|
||
"0 unconfirmed Madaline Brooklyn Kensington \n",
|
||
"1 verified Jenna Manhattan Midtown \n",
|
||
"2 NaN Elise Manhattan Harlem \n",
|
||
"3 unconfirmed Garry Brooklyn Clinton Hill \n",
|
||
"4 verified Lyndon Manhattan East Harlem \n",
|
||
"\n",
|
||
" lat long country ... service fee minimum nights \\\n",
|
||
"0 40.64749 -73.97237 United States ... $193 10.0 \n",
|
||
"1 40.75362 -73.98377 United States ... $28 30.0 \n",
|
||
"2 40.80902 -73.94190 United States ... $124 3.0 \n",
|
||
"3 40.68514 -73.95976 United States ... $74 30.0 \n",
|
||
"4 40.79851 -73.94399 United States ... $41 10.0 \n",
|
||
"\n",
|
||
" number of reviews last review reviews per month review rate number \\\n",
|
||
"0 9.0 10/19/2021 0.21 4.0 \n",
|
||
"1 45.0 5/21/2022 0.38 4.0 \n",
|
||
"2 0.0 NaN NaN 5.0 \n",
|
||
"3 270.0 7/5/2019 4.64 4.0 \n",
|
||
"4 9.0 11/19/2018 0.10 3.0 \n",
|
||
"\n",
|
||
" calculated host listings count availability 365 \\\n",
|
||
"0 6.0 286.0 \n",
|
||
"1 2.0 228.0 \n",
|
||
"2 1.0 352.0 \n",
|
||
"3 1.0 322.0 \n",
|
||
"4 1.0 289.0 \n",
|
||
"\n",
|
||
" house_rules license \n",
|
||
"0 Clean up and treat the home the way you'd like... NaN \n",
|
||
"1 Pet friendly but please confirm with me if the... NaN \n",
|
||
"2 I encourage you to use my kitchen, cooking and... NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 Please no smoking in the house, porch or on th... NaN \n",
|
||
"\n",
|
||
"[5 rows x 26 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv1.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "39d543be-013a-4976-942d-f9884274c7be",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>NAME</th>\n",
|
||
" <th>host id</th>\n",
|
||
" <th>host_identity_verified</th>\n",
|
||
" <th>host name</th>\n",
|
||
" <th>neighbourhood group</th>\n",
|
||
" <th>neighbourhood</th>\n",
|
||
" <th>lat</th>\n",
|
||
" <th>long</th>\n",
|
||
" <th>country</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>service fee</th>\n",
|
||
" <th>minimum nights</th>\n",
|
||
" <th>number of reviews</th>\n",
|
||
" <th>last review</th>\n",
|
||
" <th>reviews per month</th>\n",
|
||
" <th>review rate number</th>\n",
|
||
" <th>calculated host listings count</th>\n",
|
||
" <th>availability 365</th>\n",
|
||
" <th>house_rules</th>\n",
|
||
" <th>license</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1001254</td>\n",
|
||
" <td>Clean & quiet apt home by the park</td>\n",
|
||
" <td>80014485718</td>\n",
|
||
" <td>unconfirmed</td>\n",
|
||
" <td>Madaline</td>\n",
|
||
" <td>Brooklyn</td>\n",
|
||
" <td>Kensington</td>\n",
|
||
" <td>40.64749</td>\n",
|
||
" <td>-73.97237</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$193</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>10/19/2021</td>\n",
|
||
" <td>0.21</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>286.0</td>\n",
|
||
" <td>Clean up and treat the home the way you'd like...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1002102</td>\n",
|
||
" <td>Skylit Midtown Castle</td>\n",
|
||
" <td>52335172823</td>\n",
|
||
" <td>verified</td>\n",
|
||
" <td>Jenna</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>Midtown</td>\n",
|
||
" <td>40.75362</td>\n",
|
||
" <td>-73.98377</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$28</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>45.0</td>\n",
|
||
" <td>5/21/2022</td>\n",
|
||
" <td>0.38</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>228.0</td>\n",
|
||
" <td>Pet friendly but please confirm with me if the...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1002403</td>\n",
|
||
" <td>THE VILLAGE OF HARLEM....NEW YORK !</td>\n",
|
||
" <td>78829239556</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Elise</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>Harlem</td>\n",
|
||
" <td>40.80902</td>\n",
|
||
" <td>-73.94190</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$124</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>352.0</td>\n",
|
||
" <td>I encourage you to use my kitchen, cooking and...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1002755</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>85098326012</td>\n",
|
||
" <td>unconfirmed</td>\n",
|
||
" <td>Garry</td>\n",
|
||
" <td>Brooklyn</td>\n",
|
||
" <td>Clinton Hill</td>\n",
|
||
" <td>40.68514</td>\n",
|
||
" <td>-73.95976</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$74</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>270.0</td>\n",
|
||
" <td>7/5/2019</td>\n",
|
||
" <td>4.64</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>322.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1003689</td>\n",
|
||
" <td>Entire Apt: Spacious Studio/Loft by central park</td>\n",
|
||
" <td>92037596077</td>\n",
|
||
" <td>verified</td>\n",
|
||
" <td>Lyndon</td>\n",
|
||
" <td>Manhattan</td>\n",
|
||
" <td>East Harlem</td>\n",
|
||
" <td>40.79851</td>\n",
|
||
" <td>-73.94399</td>\n",
|
||
" <td>United States</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>$41</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>11/19/2018</td>\n",
|
||
" <td>0.10</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>289.0</td>\n",
|
||
" <td>Please no smoking in the house, porch or on th...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 26 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id NAME host id \\\n",
|
||
"0 1001254 Clean & quiet apt home by the park 80014485718 \n",
|
||
"1 1002102 Skylit Midtown Castle 52335172823 \n",
|
||
"2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
|
||
"3 1002755 NaN 85098326012 \n",
|
||
"4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
|
||
"\n",
|
||
" host_identity_verified host name neighbourhood group neighbourhood \\\n",
|
||
"0 unconfirmed Madaline Brooklyn Kensington \n",
|
||
"1 verified Jenna Manhattan Midtown \n",
|
||
"2 NaN Elise Manhattan Harlem \n",
|
||
"3 unconfirmed Garry Brooklyn Clinton Hill \n",
|
||
"4 verified Lyndon Manhattan East Harlem \n",
|
||
"\n",
|
||
" lat long country ... service fee minimum nights \\\n",
|
||
"0 40.64749 -73.97237 United States ... $193 10.0 \n",
|
||
"1 40.75362 -73.98377 United States ... $28 30.0 \n",
|
||
"2 40.80902 -73.94190 United States ... $124 3.0 \n",
|
||
"3 40.68514 -73.95976 United States ... $74 30.0 \n",
|
||
"4 40.79851 -73.94399 United States ... $41 10.0 \n",
|
||
"\n",
|
||
" number of reviews last review reviews per month review rate number \\\n",
|
||
"0 9.0 10/19/2021 0.21 4.0 \n",
|
||
"1 45.0 5/21/2022 0.38 4.0 \n",
|
||
"2 0.0 NaN NaN 5.0 \n",
|
||
"3 270.0 7/5/2019 4.64 4.0 \n",
|
||
"4 9.0 11/19/2018 0.10 3.0 \n",
|
||
"\n",
|
||
" calculated host listings count availability 365 \\\n",
|
||
"0 6.0 286.0 \n",
|
||
"1 2.0 228.0 \n",
|
||
"2 1.0 352.0 \n",
|
||
"3 1.0 322.0 \n",
|
||
"4 1.0 289.0 \n",
|
||
"\n",
|
||
" house_rules license \n",
|
||
"0 Clean up and treat the home the way you'd like... NaN \n",
|
||
"1 Pet friendly but please confirm with me if the... NaN \n",
|
||
"2 I encourage you to use my kitchen, cooking and... NaN \n",
|
||
"3 NaN NaN \n",
|
||
"4 Please no smoking in the house, porch or on th... NaN \n",
|
||
"\n",
|
||
"[5 rows x 26 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv1.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"id": "95f93b29-94be-4c93-9793-cf51c2ba2442",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>review_id</th>\n",
|
||
" <th>pseudo_author_id</th>\n",
|
||
" <th>author_name</th>\n",
|
||
" <th>review_text</th>\n",
|
||
" <th>review_rating</th>\n",
|
||
" <th>review_likes</th>\n",
|
||
" <th>author_app_version</th>\n",
|
||
" <th>review_timestamp</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>0</td>\n",
|
||
" <td>6caba53d-789d-4733-bad5-c7491daf80f2</td>\n",
|
||
" <td>152618553977019693742</td>\n",
|
||
" <td>A Google user</td>\n",
|
||
" <td>Nice app need to add red light cam.</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.99.2.3</td>\n",
|
||
" <td>2009-06-30 16:48:15</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>30c15838-8b02-4dae-8f51-25905cb40b68</td>\n",
|
||
" <td>234382942865437071667</td>\n",
|
||
" <td>A Google user</td>\n",
|
||
" <td>Really cool social app. Lots of potential to b...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.99.2.3</td>\n",
|
||
" <td>2009-06-30 16:58:43</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>c090400e-f88f-4129-930d-a650f3163a11</td>\n",
|
||
" <td>174473604608358796368</td>\n",
|
||
" <td>A Google user</td>\n",
|
||
" <td>I was all excited about this app (ehat a great...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.99.2.3</td>\n",
|
||
" <td>2009-06-30 17:08:33</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>f6f37456-793b-4786-af6e-454a811361bf</td>\n",
|
||
" <td>286593453219054880269</td>\n",
|
||
" <td>A Google user</td>\n",
|
||
" <td>I love this app! Lol</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.99.2.3</td>\n",
|
||
" <td>2009-06-30 17:37:22</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>8ae5d962-7c0c-476d-82fa-79f6e5484acc</td>\n",
|
||
" <td>167276875678680630145</td>\n",
|
||
" <td>A Google user</td>\n",
|
||
" <td>Great app i like the idea of your car being pa...</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0.99.2.3</td>\n",
|
||
" <td>2009-06-30 23:58:43</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 review_id pseudo_author_id \\\n",
|
||
"0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n",
|
||
"1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n",
|
||
"2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n",
|
||
"3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n",
|
||
"4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n",
|
||
"\n",
|
||
" author_name review_text \\\n",
|
||
"0 A Google user Nice app need to add red light cam. \n",
|
||
"1 A Google user Really cool social app. Lots of potential to b... \n",
|
||
"2 A Google user I was all excited about this app (ehat a great... \n",
|
||
"3 A Google user I love this app! Lol \n",
|
||
"4 A Google user Great app i like the idea of your car being pa... \n",
|
||
"\n",
|
||
" review_rating review_likes author_app_version review_timestamp \n",
|
||
"0 5 0 0.99.2.3 2009-06-30 16:48:15 \n",
|
||
"1 5 0 0.99.2.3 2009-06-30 16:58:43 \n",
|
||
"2 1 0 0.99.2.3 2009-06-30 17:08:33 \n",
|
||
"3 5 0 0.99.2.3 2009-06-30 17:37:22 \n",
|
||
"4 4 0 0.99.2.3 2009-06-30 23:58:43 "
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv02.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Unnamed: 0</th>\n",
|
||
" <th>review_id</th>\n",
|
||
" <th>pseudo_author_id</th>\n",
|
||
" <th>author_name</th>\n",
|
||
" <th>review_text</th>\n",
|
||
" <th>review_rating</th>\n",
|
||
" <th>review_likes</th>\n",
|
||
" <th>author_app_version</th>\n",
|
||
" <th>review_timestamp</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>780068</th>\n",
|
||
" <td>780068</td>\n",
|
||
" <td>01655504-5a51-4c19-b313-2bd5fa3f253a</td>\n",
|
||
" <td>680743620884748258838</td>\n",
|
||
" <td>Ma********ll</td>\n",
|
||
" <td>Freezes</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-11-17 03:18:26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>780069</th>\n",
|
||
" <td>780069</td>\n",
|
||
" <td>f04306cb-af60-4a44-aebc-c37122620319</td>\n",
|
||
" <td>266638684561117704682</td>\n",
|
||
" <td>Zu******el</td>\n",
|
||
" <td>To stuck</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2023-11-17 03:18:38</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>780070</th>\n",
|
||
" <td>780070</td>\n",
|
||
" <td>894e3c41-ca20-4781-9308-70eeb060a865</td>\n",
|
||
" <td>154572309081670894420</td>\n",
|
||
" <td>br**********ji</td>\n",
|
||
" <td>racist made app</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4.99.0.2</td>\n",
|
||
" <td>2023-11-17 03:23:20</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>780071</th>\n",
|
||
" <td>780071</td>\n",
|
||
" <td>4fafb0b1-485e-473e-9bcd-d5c9848424d2</td>\n",
|
||
" <td>154995071911163107981</td>\n",
|
||
" <td>Mo***********da</td>\n",
|
||
" <td>بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4.99.1.1</td>\n",
|
||
" <td>2023-11-17 04:05:02</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>780072</th>\n",
|
||
" <td>780072</td>\n",
|
||
" <td>d1570ba0-ffc5-4fc6-8d34-12daba4b38e2</td>\n",
|
||
" <td>200574835524973617311</td>\n",
|
||
" <td>Re***********iz</td>\n",
|
||
" <td>Best app ever used.</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>4.99.0.2</td>\n",
|
||
" <td>2023-11-17 04:06:44</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Unnamed: 0 review_id \\\n",
|
||
"780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n",
|
||
"780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n",
|
||
"780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n",
|
||
"780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n",
|
||
"780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n",
|
||
"\n",
|
||
" pseudo_author_id author_name \\\n",
|
||
"780068 680743620884748258838 Ma********ll \n",
|
||
"780069 266638684561117704682 Zu******el \n",
|
||
"780070 154572309081670894420 br**********ji \n",
|
||
"780071 154995071911163107981 Mo***********da \n",
|
||
"780072 200574835524973617311 Re***********iz \n",
|
||
"\n",
|
||
" review_text review_rating \\\n",
|
||
"780068 Freezes 3 \n",
|
||
"780069 To stuck 1 \n",
|
||
"780070 racist made app 1 \n",
|
||
"780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n",
|
||
"780072 Best app ever used. 5 \n",
|
||
"\n",
|
||
" review_likes author_app_version review_timestamp \n",
|
||
"780068 0 NaN 2023-11-17 03:18:26 \n",
|
||
"780069 0 NaN 2023-11-17 03:18:38 \n",
|
||
"780070 0 4.99.0.2 2023-11-17 03:23:20 \n",
|
||
"780071 0 4.99.1.1 2023-11-17 04:05:02 \n",
|
||
"780072 0 4.99.0.2 2023-11-17 04:06:44 "
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"csv02.tail()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>source</th>\n",
|
||
" <th>review_id</th>\n",
|
||
" <th>user_name</th>\n",
|
||
" <th>review_title</th>\n",
|
||
" <th>review_description</th>\n",
|
||
" <th>rating</th>\n",
|
||
" <th>thumbs_up</th>\n",
|
||
" <th>review_date</th>\n",
|
||
" <th>developer_response</th>\n",
|
||
" <th>developer_response_date</th>\n",
|
||
" <th>appVersion</th>\n",
|
||
" <th>laguage_code</th>\n",
|
||
" <th>country_code</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>18d6584c-d0e9-4833-a744-f607058aee97</td>\n",
|
||
" <td>Milky Way</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Suddenly, the driver can't have my location an...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 17:48:51</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>50a08f18-cece-4ddf-b617-028844c8aa28</td>\n",
|
||
" <td>Bradlee Severa</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Very cordial.. And helped with a quick turnaro...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 17:38:35</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.485.10000</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7</td>\n",
|
||
" <td>Amit Aggarwal</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Very good experience</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 17:38:17</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.486.10002</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>502702a9-25ed-4373-a96c-7fa1f06caacd</td>\n",
|
||
" <td>Bryant Inman</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>All I use</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 17:37:45</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.467.10008</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>f47a3fb6-23db-49bd-9e63-f33c8d724d07</td>\n",
|
||
" <td>Addie Whittaker</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>I have enjoyed traveling by Uber my drivers ha...</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 17:36:56</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4.486.10002</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" source review_id user_name \\\n",
|
||
"0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n",
|
||
"1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n",
|
||
"2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n",
|
||
"3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n",
|
||
"4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n",
|
||
"\n",
|
||
" review_title review_description rating \\\n",
|
||
"0 NaN Suddenly, the driver can't have my location an... 1 \n",
|
||
"1 NaN Very cordial.. And helped with a quick turnaro... 5 \n",
|
||
"2 NaN Very good experience 5 \n",
|
||
"3 NaN All I use 5 \n",
|
||
"4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n",
|
||
"\n",
|
||
" thumbs_up review_date developer_response developer_response_date \\\n",
|
||
"0 0.0 2023-08-10 17:48:51 NaN NaN \n",
|
||
"1 0.0 2023-08-10 17:38:35 NaN NaN \n",
|
||
"2 0.0 2023-08-10 17:38:17 NaN NaN \n",
|
||
"3 0.0 2023-08-10 17:37:45 NaN NaN \n",
|
||
"4 0.0 2023-08-10 17:36:56 NaN NaN \n",
|
||
"\n",
|
||
" appVersion laguage_code country_code \n",
|
||
"0 NaN en in \n",
|
||
"1 4.485.10000 en in \n",
|
||
"2 4.486.10002 en in \n",
|
||
"3 4.467.10008 en in \n",
|
||
"4 4.486.10002 en in "
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n",
|
||
"uber.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"id": "d4ace5a2-346a-4099-9854-1cac2749a216",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(1069616, 13)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(np.shape(uber))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>source</th>\n",
|
||
" <th>review_id</th>\n",
|
||
" <th>user_name</th>\n",
|
||
" <th>review_title</th>\n",
|
||
" <th>review_description</th>\n",
|
||
" <th>rating</th>\n",
|
||
" <th>thumbs_up</th>\n",
|
||
" <th>review_date</th>\n",
|
||
" <th>developer_response</th>\n",
|
||
" <th>developer_response_date</th>\n",
|
||
" <th>appVersion</th>\n",
|
||
" <th>laguage_code</th>\n",
|
||
" <th>country_code</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>fbc7ffc9-5a89-446e-87fd-d69bf4a7f984</td>\n",
|
||
" <td>Puipuii Ralte</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>The map in Ola is so messed up, i have to pay ...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 16:40:50</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6.3.2</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>5a0051fb-220a-45b2-ba94-a15a2949218f</td>\n",
|
||
" <td>Deepak Kumar</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Deepak Kumar.... 🙏🙏🙏🙏🙏]</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 16:36:14</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>71ebf933-b734-474d-bb65-a18c90906ed2</td>\n",
|
||
" <td>Ahamed Azarudeen</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Such aa irresponsible app more then I waiting ...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 16:29:31</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6.3.1</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>e1cc0010-60b3-4126-99c2-e8549088566a</td>\n",
|
||
" <td>Rahil Syed</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Worst</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 15:52:06</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5.0.4</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Google Play</td>\n",
|
||
" <td>77cf1be1-b428-4493-ae25-e0f288f79b8f</td>\n",
|
||
" <td>vin 007</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Too much expensive .. try UBer... They are pro...</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2023-08-10 15:51:10</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>en</td>\n",
|
||
" <td>in</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" source review_id user_name \\\n",
|
||
"0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n",
|
||
"1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n",
|
||
"2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n",
|
||
"3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n",
|
||
"4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n",
|
||
"\n",
|
||
" review_title review_description rating \\\n",
|
||
"0 NaN The map in Ola is so messed up, i have to pay ... 1 \n",
|
||
"1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n",
|
||
"2 NaN Such aa irresponsible app more then I waiting ... 1 \n",
|
||
"3 NaN Worst 1 \n",
|
||
"4 NaN Too much expensive .. try UBer... They are pro... 1 \n",
|
||
"\n",
|
||
" thumbs_up review_date developer_response developer_response_date \\\n",
|
||
"0 0.0 2023-08-10 16:40:50 NaN NaN \n",
|
||
"1 0.0 2023-08-10 16:36:14 NaN NaN \n",
|
||
"2 0.0 2023-08-10 16:29:31 NaN NaN \n",
|
||
"3 0.0 2023-08-10 15:52:06 NaN NaN \n",
|
||
"4 0.0 2023-08-10 15:51:10 NaN NaN \n",
|
||
"\n",
|
||
" appVersion laguage_code country_code \n",
|
||
"0 6.3.2 en in \n",
|
||
"1 NaN en in \n",
|
||
"2 6.3.1 en in \n",
|
||
"3 5.0.4 en in \n",
|
||
"4 NaN en in "
|
||
]
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n",
|
||
"ola_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"id": "878a39c4-45d5-41d6-82b0-9c373c28e280",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 357678.000000\n",
|
||
"mean 92.402697\n",
|
||
"std 125.489169\n",
|
||
"min 1.000000\n",
|
||
"25% 8.000000\n",
|
||
"50% 33.000000\n",
|
||
"75% 131.000000\n",
|
||
"max 2877.000000\n",
|
||
"Name: review_length, dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Check average review length\n",
|
||
"ola_df['review_length'] = ola_df['review_description'].str.len()\n",
|
||
"print(ola_df['review_length'].describe())\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"id": "1dd032ba-343b-4402-9d96-ee5e0432ab07",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Substantive reviews: 204715\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Filter out very short reviews\n",
|
||
"substantive_reviews = ola_df[ola_df['review_length'] > 20]\n",
|
||
"print(f\"Substantive reviews: {len(substantive_reviews)}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"count 1.069447e+06\n",
|
||
"mean 7.023987e+01\n",
|
||
"std 1.158196e+02\n",
|
||
"min 1.000000e+00\n",
|
||
"25% 8.000000e+00\n",
|
||
"50% 2.100000e+01\n",
|
||
"75% 7.800000e+01\n",
|
||
"max 3.792000e+03\n",
|
||
"Name: review_length, dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Check average review length\n",
|
||
"uber['review_length'] = uber['review_description'].str.len()\n",
|
||
"print(uber['review_length'].describe())\n",
|
||
"\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"id": "2dd05939-e87c-443d-9012-e5f45cf64ff5",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Substantive reviews: 542110\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Filter out very short reviews\n",
|
||
"substantive_reviews = uber[uber['review_length'] > 20]\n",
|
||
"print(f\"Substantive reviews: {len(substantive_reviews)}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|