{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a",
"metadata": {},
"outputs": [],
"source": [
"csv0 = pd.read_csv(\"spotify.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c0631560-c1be-4bbf-b050-b6a552e74d63",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time_submitted | \n",
" Review | \n",
" Rating | \n",
" Total_thumbsup | \n",
" Reply | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 2022-07-09 15:00:00 | \n",
" Great music service, the audio is high quality... | \n",
" 5 | \n",
" 2 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" 2022-07-09 14:21:22 | \n",
" Please ignore previous negative rating. This a... | \n",
" 5 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" 2022-07-09 13:27:32 | \n",
" This pop-up \"Get the best Spotify experience o... | \n",
" 4 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" 2022-07-09 13:26:45 | \n",
" Really buggy and terrible to use as of recently | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" 2022-07-09 13:20:49 | \n",
" Dear Spotify why do I get songs that I didn't ... | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time_submitted Review \\\n",
"0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n",
"1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n",
"2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n",
"3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n",
"4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n",
"\n",
" Rating Total_thumbsup Reply \n",
"0 5 2 NaN \n",
"1 5 1 NaN \n",
"2 4 0 NaN \n",
"3 1 1 NaN \n",
"4 1 1 NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv0.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Time_submitted | \n",
" Review | \n",
" Rating | \n",
" Total_thumbsup | \n",
" Reply | \n",
"
\n",
" \n",
" \n",
" \n",
" | 61589 | \n",
" 2022-01-01 03:01:29 | \n",
" Even though it was communicated that lyrics fe... | \n",
" 1 | \n",
" 6 | \n",
" NaN | \n",
"
\n",
" \n",
" | 61590 | \n",
" 2022-01-01 02:13:40 | \n",
" Use to be sooo good back when I had it, and wh... | \n",
" 1 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 61591 | \n",
" 2022-01-01 01:02:29 | \n",
" This app would be good if not for it taking ov... | \n",
" 2 | \n",
" 10 | \n",
" NaN | \n",
"
\n",
" \n",
" | 61592 | \n",
" 2022-01-01 00:49:23 | \n",
" The app is good hard to navigate and won't jus... | \n",
" 2 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
" | 61593 | \n",
" 2022-01-01 00:19:09 | \n",
" Its good but sometimes it doesnt load the musi... | \n",
" 4 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Time_submitted Review \\\n",
"61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n",
"61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n",
"61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n",
"61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n",
"61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n",
"\n",
" Rating Total_thumbsup Reply \n",
"61589 1 6 NaN \n",
"61590 1 0 NaN \n",
"61591 2 10 NaN \n",
"61592 2 1 NaN \n",
"61593 4 0 NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv0.tail()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6",
"metadata": {},
"outputs": [],
"source": [
"csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1efba903-5004-4d7b-a1ee-42f333111055",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" NAME | \n",
" host id | \n",
" host_identity_verified | \n",
" host name | \n",
" neighbourhood group | \n",
" neighbourhood | \n",
" lat | \n",
" long | \n",
" country | \n",
" ... | \n",
" service fee | \n",
" minimum nights | \n",
" number of reviews | \n",
" last review | \n",
" reviews per month | \n",
" review rate number | \n",
" calculated host listings count | \n",
" availability 365 | \n",
" house_rules | \n",
" license | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1001254 | \n",
" Clean & quiet apt home by the park | \n",
" 80014485718 | \n",
" unconfirmed | \n",
" Madaline | \n",
" Brooklyn | \n",
" Kensington | \n",
" 40.64749 | \n",
" -73.97237 | \n",
" United States | \n",
" ... | \n",
" $193 | \n",
" 10.0 | \n",
" 9.0 | \n",
" 10/19/2021 | \n",
" 0.21 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 286.0 | \n",
" Clean up and treat the home the way you'd like... | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" 1002102 | \n",
" Skylit Midtown Castle | \n",
" 52335172823 | \n",
" verified | \n",
" Jenna | \n",
" Manhattan | \n",
" Midtown | \n",
" 40.75362 | \n",
" -73.98377 | \n",
" United States | \n",
" ... | \n",
" $28 | \n",
" 30.0 | \n",
" 45.0 | \n",
" 5/21/2022 | \n",
" 0.38 | \n",
" 4.0 | \n",
" 2.0 | \n",
" 228.0 | \n",
" Pet friendly but please confirm with me if the... | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" 1002403 | \n",
" THE VILLAGE OF HARLEM....NEW YORK ! | \n",
" 78829239556 | \n",
" NaN | \n",
" Elise | \n",
" Manhattan | \n",
" Harlem | \n",
" 40.80902 | \n",
" -73.94190 | \n",
" United States | \n",
" ... | \n",
" $124 | \n",
" 3.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" 5.0 | \n",
" 1.0 | \n",
" 352.0 | \n",
" I encourage you to use my kitchen, cooking and... | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" 1002755 | \n",
" NaN | \n",
" 85098326012 | \n",
" unconfirmed | \n",
" Garry | \n",
" Brooklyn | \n",
" Clinton Hill | \n",
" 40.68514 | \n",
" -73.95976 | \n",
" United States | \n",
" ... | \n",
" $74 | \n",
" 30.0 | \n",
" 270.0 | \n",
" 7/5/2019 | \n",
" 4.64 | \n",
" 4.0 | \n",
" 1.0 | \n",
" 322.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" 1003689 | \n",
" Entire Apt: Spacious Studio/Loft by central park | \n",
" 92037596077 | \n",
" verified | \n",
" Lyndon | \n",
" Manhattan | \n",
" East Harlem | \n",
" 40.79851 | \n",
" -73.94399 | \n",
" United States | \n",
" ... | \n",
" $41 | \n",
" 10.0 | \n",
" 9.0 | \n",
" 11/19/2018 | \n",
" 0.10 | \n",
" 3.0 | \n",
" 1.0 | \n",
" 289.0 | \n",
" Please no smoking in the house, porch or on th... | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 26 columns
\n",
"
"
],
"text/plain": [
" id NAME host id \\\n",
"0 1001254 Clean & quiet apt home by the park 80014485718 \n",
"1 1002102 Skylit Midtown Castle 52335172823 \n",
"2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
"3 1002755 NaN 85098326012 \n",
"4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
"\n",
" host_identity_verified host name neighbourhood group neighbourhood \\\n",
"0 unconfirmed Madaline Brooklyn Kensington \n",
"1 verified Jenna Manhattan Midtown \n",
"2 NaN Elise Manhattan Harlem \n",
"3 unconfirmed Garry Brooklyn Clinton Hill \n",
"4 verified Lyndon Manhattan East Harlem \n",
"\n",
" lat long country ... service fee minimum nights \\\n",
"0 40.64749 -73.97237 United States ... $193 10.0 \n",
"1 40.75362 -73.98377 United States ... $28 30.0 \n",
"2 40.80902 -73.94190 United States ... $124 3.0 \n",
"3 40.68514 -73.95976 United States ... $74 30.0 \n",
"4 40.79851 -73.94399 United States ... $41 10.0 \n",
"\n",
" number of reviews last review reviews per month review rate number \\\n",
"0 9.0 10/19/2021 0.21 4.0 \n",
"1 45.0 5/21/2022 0.38 4.0 \n",
"2 0.0 NaN NaN 5.0 \n",
"3 270.0 7/5/2019 4.64 4.0 \n",
"4 9.0 11/19/2018 0.10 3.0 \n",
"\n",
" calculated host listings count availability 365 \\\n",
"0 6.0 286.0 \n",
"1 2.0 228.0 \n",
"2 1.0 352.0 \n",
"3 1.0 322.0 \n",
"4 1.0 289.0 \n",
"\n",
" house_rules license \n",
"0 Clean up and treat the home the way you'd like... NaN \n",
"1 Pet friendly but please confirm with me if the... NaN \n",
"2 I encourage you to use my kitchen, cooking and... NaN \n",
"3 NaN NaN \n",
"4 Please no smoking in the house, porch or on th... NaN \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv1.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "39d543be-013a-4976-942d-f9884274c7be",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" NAME | \n",
" host id | \n",
" host_identity_verified | \n",
" host name | \n",
" neighbourhood group | \n",
" neighbourhood | \n",
" lat | \n",
" long | \n",
" country | \n",
" ... | \n",
" service fee | \n",
" minimum nights | \n",
" number of reviews | \n",
" last review | \n",
" reviews per month | \n",
" review rate number | \n",
" calculated host listings count | \n",
" availability 365 | \n",
" house_rules | \n",
" license | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1001254 | \n",
" Clean & quiet apt home by the park | \n",
" 80014485718 | \n",
" unconfirmed | \n",
" Madaline | \n",
" Brooklyn | \n",
" Kensington | \n",
" 40.64749 | \n",
" -73.97237 | \n",
" United States | \n",
" ... | \n",
" $193 | \n",
" 10.0 | \n",
" 9.0 | \n",
" 10/19/2021 | \n",
" 0.21 | \n",
" 4.0 | \n",
" 6.0 | \n",
" 286.0 | \n",
" Clean up and treat the home the way you'd like... | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" 1002102 | \n",
" Skylit Midtown Castle | \n",
" 52335172823 | \n",
" verified | \n",
" Jenna | \n",
" Manhattan | \n",
" Midtown | \n",
" 40.75362 | \n",
" -73.98377 | \n",
" United States | \n",
" ... | \n",
" $28 | \n",
" 30.0 | \n",
" 45.0 | \n",
" 5/21/2022 | \n",
" 0.38 | \n",
" 4.0 | \n",
" 2.0 | \n",
" 228.0 | \n",
" Pet friendly but please confirm with me if the... | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" 1002403 | \n",
" THE VILLAGE OF HARLEM....NEW YORK ! | \n",
" 78829239556 | \n",
" NaN | \n",
" Elise | \n",
" Manhattan | \n",
" Harlem | \n",
" 40.80902 | \n",
" -73.94190 | \n",
" United States | \n",
" ... | \n",
" $124 | \n",
" 3.0 | \n",
" 0.0 | \n",
" NaN | \n",
" NaN | \n",
" 5.0 | \n",
" 1.0 | \n",
" 352.0 | \n",
" I encourage you to use my kitchen, cooking and... | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" 1002755 | \n",
" NaN | \n",
" 85098326012 | \n",
" unconfirmed | \n",
" Garry | \n",
" Brooklyn | \n",
" Clinton Hill | \n",
" 40.68514 | \n",
" -73.95976 | \n",
" United States | \n",
" ... | \n",
" $74 | \n",
" 30.0 | \n",
" 270.0 | \n",
" 7/5/2019 | \n",
" 4.64 | \n",
" 4.0 | \n",
" 1.0 | \n",
" 322.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" 1003689 | \n",
" Entire Apt: Spacious Studio/Loft by central park | \n",
" 92037596077 | \n",
" verified | \n",
" Lyndon | \n",
" Manhattan | \n",
" East Harlem | \n",
" 40.79851 | \n",
" -73.94399 | \n",
" United States | \n",
" ... | \n",
" $41 | \n",
" 10.0 | \n",
" 9.0 | \n",
" 11/19/2018 | \n",
" 0.10 | \n",
" 3.0 | \n",
" 1.0 | \n",
" 289.0 | \n",
" Please no smoking in the house, porch or on th... | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 26 columns
\n",
"
"
],
"text/plain": [
" id NAME host id \\\n",
"0 1001254 Clean & quiet apt home by the park 80014485718 \n",
"1 1002102 Skylit Midtown Castle 52335172823 \n",
"2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n",
"3 1002755 NaN 85098326012 \n",
"4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n",
"\n",
" host_identity_verified host name neighbourhood group neighbourhood \\\n",
"0 unconfirmed Madaline Brooklyn Kensington \n",
"1 verified Jenna Manhattan Midtown \n",
"2 NaN Elise Manhattan Harlem \n",
"3 unconfirmed Garry Brooklyn Clinton Hill \n",
"4 verified Lyndon Manhattan East Harlem \n",
"\n",
" lat long country ... service fee minimum nights \\\n",
"0 40.64749 -73.97237 United States ... $193 10.0 \n",
"1 40.75362 -73.98377 United States ... $28 30.0 \n",
"2 40.80902 -73.94190 United States ... $124 3.0 \n",
"3 40.68514 -73.95976 United States ... $74 30.0 \n",
"4 40.79851 -73.94399 United States ... $41 10.0 \n",
"\n",
" number of reviews last review reviews per month review rate number \\\n",
"0 9.0 10/19/2021 0.21 4.0 \n",
"1 45.0 5/21/2022 0.38 4.0 \n",
"2 0.0 NaN NaN 5.0 \n",
"3 270.0 7/5/2019 4.64 4.0 \n",
"4 9.0 11/19/2018 0.10 3.0 \n",
"\n",
" calculated host listings count availability 365 \\\n",
"0 6.0 286.0 \n",
"1 2.0 228.0 \n",
"2 1.0 352.0 \n",
"3 1.0 322.0 \n",
"4 1.0 289.0 \n",
"\n",
" house_rules license \n",
"0 Clean up and treat the home the way you'd like... NaN \n",
"1 Pet friendly but please confirm with me if the... NaN \n",
"2 I encourage you to use my kitchen, cooking and... NaN \n",
"3 NaN NaN \n",
"4 Please no smoking in the house, porch or on th... NaN \n",
"\n",
"[5 rows x 26 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv1.head()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "95f93b29-94be-4c93-9793-cf51c2ba2442",
"metadata": {},
"outputs": [],
"source": [
"csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" review_id | \n",
" pseudo_author_id | \n",
" author_name | \n",
" review_text | \n",
" review_rating | \n",
" review_likes | \n",
" author_app_version | \n",
" review_timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 6caba53d-789d-4733-bad5-c7491daf80f2 | \n",
" 152618553977019693742 | \n",
" A Google user | \n",
" Nice app need to add red light cam. | \n",
" 5 | \n",
" 0 | \n",
" 0.99.2.3 | \n",
" 2009-06-30 16:48:15 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 30c15838-8b02-4dae-8f51-25905cb40b68 | \n",
" 234382942865437071667 | \n",
" A Google user | \n",
" Really cool social app. Lots of potential to b... | \n",
" 5 | \n",
" 0 | \n",
" 0.99.2.3 | \n",
" 2009-06-30 16:58:43 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" c090400e-f88f-4129-930d-a650f3163a11 | \n",
" 174473604608358796368 | \n",
" A Google user | \n",
" I was all excited about this app (ehat a great... | \n",
" 1 | \n",
" 0 | \n",
" 0.99.2.3 | \n",
" 2009-06-30 17:08:33 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3 | \n",
" f6f37456-793b-4786-af6e-454a811361bf | \n",
" 286593453219054880269 | \n",
" A Google user | \n",
" I love this app! Lol | \n",
" 5 | \n",
" 0 | \n",
" 0.99.2.3 | \n",
" 2009-06-30 17:37:22 | \n",
"
\n",
" \n",
" | 4 | \n",
" 4 | \n",
" 8ae5d962-7c0c-476d-82fa-79f6e5484acc | \n",
" 167276875678680630145 | \n",
" A Google user | \n",
" Great app i like the idea of your car being pa... | \n",
" 4 | \n",
" 0 | \n",
" 0.99.2.3 | \n",
" 2009-06-30 23:58:43 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 review_id pseudo_author_id \\\n",
"0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n",
"1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n",
"2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n",
"3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n",
"4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n",
"\n",
" author_name review_text \\\n",
"0 A Google user Nice app need to add red light cam. \n",
"1 A Google user Really cool social app. Lots of potential to b... \n",
"2 A Google user I was all excited about this app (ehat a great... \n",
"3 A Google user I love this app! Lol \n",
"4 A Google user Great app i like the idea of your car being pa... \n",
"\n",
" review_rating review_likes author_app_version review_timestamp \n",
"0 5 0 0.99.2.3 2009-06-30 16:48:15 \n",
"1 5 0 0.99.2.3 2009-06-30 16:58:43 \n",
"2 1 0 0.99.2.3 2009-06-30 17:08:33 \n",
"3 5 0 0.99.2.3 2009-06-30 17:37:22 \n",
"4 4 0 0.99.2.3 2009-06-30 23:58:43 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv02.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" review_id | \n",
" pseudo_author_id | \n",
" author_name | \n",
" review_text | \n",
" review_rating | \n",
" review_likes | \n",
" author_app_version | \n",
" review_timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" | 780068 | \n",
" 780068 | \n",
" 01655504-5a51-4c19-b313-2bd5fa3f253a | \n",
" 680743620884748258838 | \n",
" Ma********ll | \n",
" Freezes | \n",
" 3 | \n",
" 0 | \n",
" NaN | \n",
" 2023-11-17 03:18:26 | \n",
"
\n",
" \n",
" | 780069 | \n",
" 780069 | \n",
" f04306cb-af60-4a44-aebc-c37122620319 | \n",
" 266638684561117704682 | \n",
" Zu******el | \n",
" To stuck | \n",
" 1 | \n",
" 0 | \n",
" NaN | \n",
" 2023-11-17 03:18:38 | \n",
"
\n",
" \n",
" | 780070 | \n",
" 780070 | \n",
" 894e3c41-ca20-4781-9308-70eeb060a865 | \n",
" 154572309081670894420 | \n",
" br**********ji | \n",
" racist made app | \n",
" 1 | \n",
" 0 | \n",
" 4.99.0.2 | \n",
" 2023-11-17 03:23:20 | \n",
"
\n",
" \n",
" | 780071 | \n",
" 780071 | \n",
" 4fafb0b1-485e-473e-9bcd-d5c9848424d2 | \n",
" 154995071911163107981 | \n",
" Mo***********da | \n",
" بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... | \n",
" 5 | \n",
" 0 | \n",
" 4.99.1.1 | \n",
" 2023-11-17 04:05:02 | \n",
"
\n",
" \n",
" | 780072 | \n",
" 780072 | \n",
" d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 | \n",
" 200574835524973617311 | \n",
" Re***********iz | \n",
" Best app ever used. | \n",
" 5 | \n",
" 0 | \n",
" 4.99.0.2 | \n",
" 2023-11-17 04:06:44 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 review_id \\\n",
"780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n",
"780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n",
"780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n",
"780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n",
"780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n",
"\n",
" pseudo_author_id author_name \\\n",
"780068 680743620884748258838 Ma********ll \n",
"780069 266638684561117704682 Zu******el \n",
"780070 154572309081670894420 br**********ji \n",
"780071 154995071911163107981 Mo***********da \n",
"780072 200574835524973617311 Re***********iz \n",
"\n",
" review_text review_rating \\\n",
"780068 Freezes 3 \n",
"780069 To stuck 1 \n",
"780070 racist made app 1 \n",
"780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n",
"780072 Best app ever used. 5 \n",
"\n",
" review_likes author_app_version review_timestamp \n",
"780068 0 NaN 2023-11-17 03:18:26 \n",
"780069 0 NaN 2023-11-17 03:18:38 \n",
"780070 0 4.99.0.2 2023-11-17 03:23:20 \n",
"780071 0 4.99.1.1 2023-11-17 04:05:02 \n",
"780072 0 4.99.0.2 2023-11-17 04:06:44 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"csv02.tail()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" source | \n",
" review_id | \n",
" user_name | \n",
" review_title | \n",
" review_description | \n",
" rating | \n",
" thumbs_up | \n",
" review_date | \n",
" developer_response | \n",
" developer_response_date | \n",
" appVersion | \n",
" laguage_code | \n",
" country_code | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Google Play | \n",
" 18d6584c-d0e9-4833-a744-f607058aee97 | \n",
" Milky Way | \n",
" NaN | \n",
" Suddenly, the driver can't have my location an... | \n",
" 1 | \n",
" 0.0 | \n",
" 2023-08-10 17:48:51 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 1 | \n",
" Google Play | \n",
" 50a08f18-cece-4ddf-b617-028844c8aa28 | \n",
" Bradlee Severa | \n",
" NaN | \n",
" Very cordial.. And helped with a quick turnaro... | \n",
" 5 | \n",
" 0.0 | \n",
" 2023-08-10 17:38:35 | \n",
" NaN | \n",
" NaN | \n",
" 4.485.10000 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 2 | \n",
" Google Play | \n",
" b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 | \n",
" Amit Aggarwal | \n",
" NaN | \n",
" Very good experience | \n",
" 5 | \n",
" 0.0 | \n",
" 2023-08-10 17:38:17 | \n",
" NaN | \n",
" NaN | \n",
" 4.486.10002 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 3 | \n",
" Google Play | \n",
" 502702a9-25ed-4373-a96c-7fa1f06caacd | \n",
" Bryant Inman | \n",
" NaN | \n",
" All I use | \n",
" 5 | \n",
" 0.0 | \n",
" 2023-08-10 17:37:45 | \n",
" NaN | \n",
" NaN | \n",
" 4.467.10008 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 4 | \n",
" Google Play | \n",
" f47a3fb6-23db-49bd-9e63-f33c8d724d07 | \n",
" Addie Whittaker | \n",
" NaN | \n",
" I have enjoyed traveling by Uber my drivers ha... | \n",
" 5 | \n",
" 0.0 | \n",
" 2023-08-10 17:36:56 | \n",
" NaN | \n",
" NaN | \n",
" 4.486.10002 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" source review_id user_name \\\n",
"0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n",
"1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n",
"2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n",
"3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n",
"4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n",
"\n",
" review_title review_description rating \\\n",
"0 NaN Suddenly, the driver can't have my location an... 1 \n",
"1 NaN Very cordial.. And helped with a quick turnaro... 5 \n",
"2 NaN Very good experience 5 \n",
"3 NaN All I use 5 \n",
"4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n",
"\n",
" thumbs_up review_date developer_response developer_response_date \\\n",
"0 0.0 2023-08-10 17:48:51 NaN NaN \n",
"1 0.0 2023-08-10 17:38:35 NaN NaN \n",
"2 0.0 2023-08-10 17:38:17 NaN NaN \n",
"3 0.0 2023-08-10 17:37:45 NaN NaN \n",
"4 0.0 2023-08-10 17:36:56 NaN NaN \n",
"\n",
" appVersion laguage_code country_code \n",
"0 NaN en in \n",
"1 4.485.10000 en in \n",
"2 4.486.10002 en in \n",
"3 4.467.10008 en in \n",
"4 4.486.10002 en in "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n",
"uber.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "d4ace5a2-346a-4099-9854-1cac2749a216",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1069616, 13)\n"
]
}
],
"source": [
"print(np.shape(uber))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" source | \n",
" review_id | \n",
" user_name | \n",
" review_title | \n",
" review_description | \n",
" rating | \n",
" thumbs_up | \n",
" review_date | \n",
" developer_response | \n",
" developer_response_date | \n",
" appVersion | \n",
" laguage_code | \n",
" country_code | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Google Play | \n",
" fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 | \n",
" Puipuii Ralte | \n",
" NaN | \n",
" The map in Ola is so messed up, i have to pay ... | \n",
" 1 | \n",
" 0.0 | \n",
" 2023-08-10 16:40:50 | \n",
" NaN | \n",
" NaN | \n",
" 6.3.2 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 1 | \n",
" Google Play | \n",
" 5a0051fb-220a-45b2-ba94-a15a2949218f | \n",
" Deepak Kumar | \n",
" NaN | \n",
" Deepak Kumar.... 🙏🙏🙏🙏🙏] | \n",
" 5 | \n",
" 0.0 | \n",
" 2023-08-10 16:36:14 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 2 | \n",
" Google Play | \n",
" 71ebf933-b734-474d-bb65-a18c90906ed2 | \n",
" Ahamed Azarudeen | \n",
" NaN | \n",
" Such aa irresponsible app more then I waiting ... | \n",
" 1 | \n",
" 0.0 | \n",
" 2023-08-10 16:29:31 | \n",
" NaN | \n",
" NaN | \n",
" 6.3.1 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 3 | \n",
" Google Play | \n",
" e1cc0010-60b3-4126-99c2-e8549088566a | \n",
" Rahil Syed | \n",
" NaN | \n",
" Worst | \n",
" 1 | \n",
" 0.0 | \n",
" 2023-08-10 15:52:06 | \n",
" NaN | \n",
" NaN | \n",
" 5.0.4 | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
" | 4 | \n",
" Google Play | \n",
" 77cf1be1-b428-4493-ae25-e0f288f79b8f | \n",
" vin 007 | \n",
" NaN | \n",
" Too much expensive .. try UBer... They are pro... | \n",
" 1 | \n",
" 0.0 | \n",
" 2023-08-10 15:51:10 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" en | \n",
" in | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" source review_id user_name \\\n",
"0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n",
"1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n",
"2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n",
"3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n",
"4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n",
"\n",
" review_title review_description rating \\\n",
"0 NaN The map in Ola is so messed up, i have to pay ... 1 \n",
"1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n",
"2 NaN Such aa irresponsible app more then I waiting ... 1 \n",
"3 NaN Worst 1 \n",
"4 NaN Too much expensive .. try UBer... They are pro... 1 \n",
"\n",
" thumbs_up review_date developer_response developer_response_date \\\n",
"0 0.0 2023-08-10 16:40:50 NaN NaN \n",
"1 0.0 2023-08-10 16:36:14 NaN NaN \n",
"2 0.0 2023-08-10 16:29:31 NaN NaN \n",
"3 0.0 2023-08-10 15:52:06 NaN NaN \n",
"4 0.0 2023-08-10 15:51:10 NaN NaN \n",
"\n",
" appVersion laguage_code country_code \n",
"0 6.3.2 en in \n",
"1 NaN en in \n",
"2 6.3.1 en in \n",
"3 5.0.4 en in \n",
"4 NaN en in "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n",
"ola_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "878a39c4-45d5-41d6-82b0-9c373c28e280",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"count 357678.000000\n",
"mean 92.402697\n",
"std 125.489169\n",
"min 1.000000\n",
"25% 8.000000\n",
"50% 33.000000\n",
"75% 131.000000\n",
"max 2877.000000\n",
"Name: review_length, dtype: float64\n"
]
}
],
"source": [
"# Check average review length\n",
"ola_df['review_length'] = ola_df['review_description'].str.len()\n",
"print(ola_df['review_length'].describe())\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "1dd032ba-343b-4402-9d96-ee5e0432ab07",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Substantive reviews: 204715\n"
]
}
],
"source": [
"# Filter out very short reviews\n",
"substantive_reviews = ola_df[ola_df['review_length'] > 20]\n",
"print(f\"Substantive reviews: {len(substantive_reviews)}\")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"count 1.069447e+06\n",
"mean 7.023987e+01\n",
"std 1.158196e+02\n",
"min 1.000000e+00\n",
"25% 8.000000e+00\n",
"50% 2.100000e+01\n",
"75% 7.800000e+01\n",
"max 3.792000e+03\n",
"Name: review_length, dtype: float64\n"
]
}
],
"source": [
"# Check average review length\n",
"uber['review_length'] = uber['review_description'].str.len()\n",
"print(uber['review_length'].describe())\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "2dd05939-e87c-443d-9012-e5f45cf64ff5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Substantive reviews: 542110\n"
]
}
],
"source": [
"# Filter out very short reviews\n",
"substantive_reviews = uber[uber['review_length'] > 20]\n",
"print(f\"Substantive reviews: {len(substantive_reviews)}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}