diff --git a/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb b/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb new file mode 100644 index 0000000..910a184 --- /dev/null +++ b/.ipynb_checkpoints/datasets_reviews-checkpoint.ipynb @@ -0,0 +1,1471 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a", + "metadata": {}, + "outputs": [], + "source": [ + "csv0 = pd.read_csv(\"spotify.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c0631560-c1be-4bbf-b050-b6a552e74d63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_submittedReviewRatingTotal_thumbsupReply
02022-07-09 15:00:00Great music service, the audio is high quality...52NaN
12022-07-09 14:21:22Please ignore previous negative rating. This a...51NaN
22022-07-09 13:27:32This pop-up \"Get the best Spotify experience o...40NaN
32022-07-09 13:26:45Really buggy and terrible to use as of recently11NaN
42022-07-09 13:20:49Dear Spotify why do I get songs that I didn't ...11NaN
\n", + "
" + ], + "text/plain": [ + " Time_submitted Review \\\n", + "0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n", + "1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n", + "2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n", + "3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n", + "4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n", + "\n", + " Rating Total_thumbsup Reply \n", + "0 5 2 NaN \n", + "1 5 1 NaN \n", + "2 4 0 NaN \n", + "3 1 1 NaN \n", + "4 1 1 NaN " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv0.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Time_submittedReviewRatingTotal_thumbsupReply
615892022-01-01 03:01:29Even though it was communicated that lyrics fe...16NaN
615902022-01-01 02:13:40Use to be sooo good back when I had it, and wh...10NaN
615912022-01-01 01:02:29This app would be good if not for it taking ov...210NaN
615922022-01-01 00:49:23The app is good hard to navigate and won't jus...21NaN
615932022-01-01 00:19:09Its good but sometimes it doesnt load the musi...40NaN
\n", + "
" + ], + "text/plain": [ + " Time_submitted Review \\\n", + "61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n", + "61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n", + "61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n", + "61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n", + "61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n", + "\n", + " Rating Total_thumbsup Reply \n", + "61589 1 6 NaN \n", + "61590 1 0 NaN \n", + "61591 2 10 NaN \n", + "61592 2 1 NaN \n", + "61593 4 0 NaN " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv0.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6", + "metadata": {}, + "outputs": [], + "source": [ + "csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1efba903-5004-4d7b-a1ee-42f333111055", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idNAMEhost idhost_identity_verifiedhost nameneighbourhood groupneighbourhoodlatlongcountry...service feeminimum nightsnumber of reviewslast reviewreviews per monthreview rate numbercalculated host listings countavailability 365house_ruleslicense
01001254Clean & quiet apt home by the park80014485718unconfirmedMadalineBrooklynKensington40.64749-73.97237United States...$19310.09.010/19/20210.214.06.0286.0Clean up and treat the home the way you'd like...NaN
11002102Skylit Midtown Castle52335172823verifiedJennaManhattanMidtown40.75362-73.98377United States...$2830.045.05/21/20220.384.02.0228.0Pet friendly but please confirm with me if the...NaN
21002403THE VILLAGE OF HARLEM....NEW YORK !78829239556NaNEliseManhattanHarlem40.80902-73.94190United States...$1243.00.0NaNNaN5.01.0352.0I encourage you to use my kitchen, cooking and...NaN
31002755NaN85098326012unconfirmedGarryBrooklynClinton Hill40.68514-73.95976United States...$7430.0270.07/5/20194.644.01.0322.0NaNNaN
41003689Entire Apt: Spacious Studio/Loft by central park92037596077verifiedLyndonManhattanEast Harlem40.79851-73.94399United States...$4110.09.011/19/20180.103.01.0289.0Please no smoking in the house, porch or on th...NaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " id NAME host id \\\n", + "0 1001254 Clean & quiet apt home by the park 80014485718 \n", + "1 1002102 Skylit Midtown Castle 52335172823 \n", + "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n", + "3 1002755 NaN 85098326012 \n", + "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n", + "\n", + " host_identity_verified host name neighbourhood group neighbourhood \\\n", + "0 unconfirmed Madaline Brooklyn Kensington \n", + "1 verified Jenna Manhattan Midtown \n", + "2 NaN Elise Manhattan Harlem \n", + "3 unconfirmed Garry Brooklyn Clinton Hill \n", + "4 verified Lyndon Manhattan East Harlem \n", + "\n", + " lat long country ... service fee minimum nights \\\n", + "0 40.64749 -73.97237 United States ... $193 10.0 \n", + "1 40.75362 -73.98377 United States ... $28 30.0 \n", + "2 40.80902 -73.94190 United States ... $124 3.0 \n", + "3 40.68514 -73.95976 United States ... $74 30.0 \n", + "4 40.79851 -73.94399 United States ... $41 10.0 \n", + "\n", + " number of reviews last review reviews per month review rate number \\\n", + "0 9.0 10/19/2021 0.21 4.0 \n", + "1 45.0 5/21/2022 0.38 4.0 \n", + "2 0.0 NaN NaN 5.0 \n", + "3 270.0 7/5/2019 4.64 4.0 \n", + "4 9.0 11/19/2018 0.10 3.0 \n", + "\n", + " calculated host listings count availability 365 \\\n", + "0 6.0 286.0 \n", + "1 2.0 228.0 \n", + "2 1.0 352.0 \n", + "3 1.0 322.0 \n", + "4 1.0 289.0 \n", + "\n", + " house_rules license \n", + "0 Clean up and treat the home the way you'd like... NaN \n", + "1 Pet friendly but please confirm with me if the... NaN \n", + "2 I encourage you to use my kitchen, cooking and... NaN \n", + "3 NaN NaN \n", + "4 Please no smoking in the house, porch or on th... NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "39d543be-013a-4976-942d-f9884274c7be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idNAMEhost idhost_identity_verifiedhost nameneighbourhood groupneighbourhoodlatlongcountry...service feeminimum nightsnumber of reviewslast reviewreviews per monthreview rate numbercalculated host listings countavailability 365house_ruleslicense
01001254Clean & quiet apt home by the park80014485718unconfirmedMadalineBrooklynKensington40.64749-73.97237United States...$19310.09.010/19/20210.214.06.0286.0Clean up and treat the home the way you'd like...NaN
11002102Skylit Midtown Castle52335172823verifiedJennaManhattanMidtown40.75362-73.98377United States...$2830.045.05/21/20220.384.02.0228.0Pet friendly but please confirm with me if the...NaN
21002403THE VILLAGE OF HARLEM....NEW YORK !78829239556NaNEliseManhattanHarlem40.80902-73.94190United States...$1243.00.0NaNNaN5.01.0352.0I encourage you to use my kitchen, cooking and...NaN
31002755NaN85098326012unconfirmedGarryBrooklynClinton Hill40.68514-73.95976United States...$7430.0270.07/5/20194.644.01.0322.0NaNNaN
41003689Entire Apt: Spacious Studio/Loft by central park92037596077verifiedLyndonManhattanEast Harlem40.79851-73.94399United States...$4110.09.011/19/20180.103.01.0289.0Please no smoking in the house, porch or on th...NaN
\n", + "

5 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " id NAME host id \\\n", + "0 1001254 Clean & quiet apt home by the park 80014485718 \n", + "1 1002102 Skylit Midtown Castle 52335172823 \n", + "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n", + "3 1002755 NaN 85098326012 \n", + "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n", + "\n", + " host_identity_verified host name neighbourhood group neighbourhood \\\n", + "0 unconfirmed Madaline Brooklyn Kensington \n", + "1 verified Jenna Manhattan Midtown \n", + "2 NaN Elise Manhattan Harlem \n", + "3 unconfirmed Garry Brooklyn Clinton Hill \n", + "4 verified Lyndon Manhattan East Harlem \n", + "\n", + " lat long country ... service fee minimum nights \\\n", + "0 40.64749 -73.97237 United States ... $193 10.0 \n", + "1 40.75362 -73.98377 United States ... $28 30.0 \n", + "2 40.80902 -73.94190 United States ... $124 3.0 \n", + "3 40.68514 -73.95976 United States ... $74 30.0 \n", + "4 40.79851 -73.94399 United States ... $41 10.0 \n", + "\n", + " number of reviews last review reviews per month review rate number \\\n", + "0 9.0 10/19/2021 0.21 4.0 \n", + "1 45.0 5/21/2022 0.38 4.0 \n", + "2 0.0 NaN NaN 5.0 \n", + "3 270.0 7/5/2019 4.64 4.0 \n", + "4 9.0 11/19/2018 0.10 3.0 \n", + "\n", + " calculated host listings count availability 365 \\\n", + "0 6.0 286.0 \n", + "1 2.0 228.0 \n", + "2 1.0 352.0 \n", + "3 1.0 322.0 \n", + "4 1.0 289.0 \n", + "\n", + " house_rules license \n", + "0 Clean up and treat the home the way you'd like... NaN \n", + "1 Pet friendly but please confirm with me if the... NaN \n", + "2 I encourage you to use my kitchen, cooking and... NaN \n", + "3 NaN NaN \n", + "4 Please no smoking in the house, porch or on th... NaN \n", + "\n", + "[5 rows x 26 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "95f93b29-94be-4c93-9793-cf51c2ba2442", + "metadata": {}, + "outputs": [], + "source": [ + "csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0review_idpseudo_author_idauthor_namereview_textreview_ratingreview_likesauthor_app_versionreview_timestamp
006caba53d-789d-4733-bad5-c7491daf80f2152618553977019693742A Google userNice app need to add red light cam.500.99.2.32009-06-30 16:48:15
1130c15838-8b02-4dae-8f51-25905cb40b68234382942865437071667A Google userReally cool social app. Lots of potential to b...500.99.2.32009-06-30 16:58:43
22c090400e-f88f-4129-930d-a650f3163a11174473604608358796368A Google userI was all excited about this app (ehat a great...100.99.2.32009-06-30 17:08:33
33f6f37456-793b-4786-af6e-454a811361bf286593453219054880269A Google userI love this app! Lol500.99.2.32009-06-30 17:37:22
448ae5d962-7c0c-476d-82fa-79f6e5484acc167276875678680630145A Google userGreat app i like the idea of your car being pa...400.99.2.32009-06-30 23:58:43
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 review_id pseudo_author_id \\\n", + "0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n", + "1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n", + "2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n", + "3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n", + "4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n", + "\n", + " author_name review_text \\\n", + "0 A Google user Nice app need to add red light cam. \n", + "1 A Google user Really cool social app. Lots of potential to b... \n", + "2 A Google user I was all excited about this app (ehat a great... \n", + "3 A Google user I love this app! Lol \n", + "4 A Google user Great app i like the idea of your car being pa... \n", + "\n", + " review_rating review_likes author_app_version review_timestamp \n", + "0 5 0 0.99.2.3 2009-06-30 16:48:15 \n", + "1 5 0 0.99.2.3 2009-06-30 16:58:43 \n", + "2 1 0 0.99.2.3 2009-06-30 17:08:33 \n", + "3 5 0 0.99.2.3 2009-06-30 17:37:22 \n", + "4 4 0 0.99.2.3 2009-06-30 23:58:43 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv02.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0review_idpseudo_author_idauthor_namereview_textreview_ratingreview_likesauthor_app_versionreview_timestamp
78006878006801655504-5a51-4c19-b313-2bd5fa3f253a680743620884748258838Ma********llFreezes30NaN2023-11-17 03:18:26
780069780069f04306cb-af60-4a44-aebc-c37122620319266638684561117704682Zu******elTo stuck10NaN2023-11-17 03:18:38
780070780070894e3c41-ca20-4781-9308-70eeb060a865154572309081670894420br**********jiracist made app104.99.0.22023-11-17 03:23:20
7800717800714fafb0b1-485e-473e-9bcd-d5c9848424d2154995071911163107981Mo***********daبهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...504.99.1.12023-11-17 04:05:02
780072780072d1570ba0-ffc5-4fc6-8d34-12daba4b38e2200574835524973617311Re***********izBest app ever used.504.99.0.22023-11-17 04:06:44
\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 review_id \\\n", + "780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n", + "780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n", + "780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n", + "780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n", + "780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n", + "\n", + " pseudo_author_id author_name \\\n", + "780068 680743620884748258838 Ma********ll \n", + "780069 266638684561117704682 Zu******el \n", + "780070 154572309081670894420 br**********ji \n", + "780071 154995071911163107981 Mo***********da \n", + "780072 200574835524973617311 Re***********iz \n", + "\n", + " review_text review_rating \\\n", + "780068 Freezes 3 \n", + "780069 To stuck 1 \n", + "780070 racist made app 1 \n", + "780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n", + "780072 Best app ever used. 5 \n", + "\n", + " review_likes author_app_version review_timestamp \n", + "780068 0 NaN 2023-11-17 03:18:26 \n", + "780069 0 NaN 2023-11-17 03:18:38 \n", + "780070 0 4.99.0.2 2023-11-17 03:23:20 \n", + "780071 0 4.99.1.1 2023-11-17 04:05:02 \n", + "780072 0 4.99.0.2 2023-11-17 04:06:44 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv02.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sourcereview_iduser_namereview_titlereview_descriptionratingthumbs_upreview_datedeveloper_responsedeveloper_response_dateappVersionlaguage_codecountry_code
0Google Play18d6584c-d0e9-4833-a744-f607058aee97Milky WayNaNSuddenly, the driver can't have my location an...10.02023-08-10 17:48:51NaNNaNNaNenin
1Google Play50a08f18-cece-4ddf-b617-028844c8aa28Bradlee SeveraNaNVery cordial.. And helped with a quick turnaro...50.02023-08-10 17:38:35NaNNaN4.485.10000enin
2Google Playb0d8e75a-80a7-4dcd-abaf-72b046dbeeb7Amit AggarwalNaNVery good experience50.02023-08-10 17:38:17NaNNaN4.486.10002enin
3Google Play502702a9-25ed-4373-a96c-7fa1f06caacdBryant InmanNaNAll I use50.02023-08-10 17:37:45NaNNaN4.467.10008enin
4Google Playf47a3fb6-23db-49bd-9e63-f33c8d724d07Addie WhittakerNaNI have enjoyed traveling by Uber my drivers ha...50.02023-08-10 17:36:56NaNNaN4.486.10002enin
\n", + "
" + ], + "text/plain": [ + " source review_id user_name \\\n", + "0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n", + "1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n", + "2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n", + "3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n", + "4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n", + "\n", + " review_title review_description rating \\\n", + "0 NaN Suddenly, the driver can't have my location an... 1 \n", + "1 NaN Very cordial.. And helped with a quick turnaro... 5 \n", + "2 NaN Very good experience 5 \n", + "3 NaN All I use 5 \n", + "4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n", + "\n", + " thumbs_up review_date developer_response developer_response_date \\\n", + "0 0.0 2023-08-10 17:48:51 NaN NaN \n", + "1 0.0 2023-08-10 17:38:35 NaN NaN \n", + "2 0.0 2023-08-10 17:38:17 NaN NaN \n", + "3 0.0 2023-08-10 17:37:45 NaN NaN \n", + "4 0.0 2023-08-10 17:36:56 NaN NaN \n", + "\n", + " appVersion laguage_code country_code \n", + "0 NaN en in \n", + "1 4.485.10000 en in \n", + "2 4.486.10002 en in \n", + "3 4.467.10008 en in \n", + "4 4.486.10002 en in " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n", + "uber.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "d4ace5a2-346a-4099-9854-1cac2749a216", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1069616, 13)\n" + ] + } + ], + "source": [ + "print(np.shape(uber))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sourcereview_iduser_namereview_titlereview_descriptionratingthumbs_upreview_datedeveloper_responsedeveloper_response_dateappVersionlaguage_codecountry_code
0Google Playfbc7ffc9-5a89-446e-87fd-d69bf4a7f984Puipuii RalteNaNThe map in Ola is so messed up, i have to pay ...10.02023-08-10 16:40:50NaNNaN6.3.2enin
1Google Play5a0051fb-220a-45b2-ba94-a15a2949218fDeepak KumarNaNDeepak Kumar.... 🙏🙏🙏🙏🙏]50.02023-08-10 16:36:14NaNNaNNaNenin
2Google Play71ebf933-b734-474d-bb65-a18c90906ed2Ahamed AzarudeenNaNSuch aa irresponsible app more then I waiting ...10.02023-08-10 16:29:31NaNNaN6.3.1enin
3Google Playe1cc0010-60b3-4126-99c2-e8549088566aRahil SyedNaNWorst10.02023-08-10 15:52:06NaNNaN5.0.4enin
4Google Play77cf1be1-b428-4493-ae25-e0f288f79b8fvin 007NaNToo much expensive .. try UBer... They are pro...10.02023-08-10 15:51:10NaNNaNNaNenin
\n", + "
" + ], + "text/plain": [ + " source review_id user_name \\\n", + "0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n", + "1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n", + "2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n", + "3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n", + "4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n", + "\n", + " review_title review_description rating \\\n", + "0 NaN The map in Ola is so messed up, i have to pay ... 1 \n", + "1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n", + "2 NaN Such aa irresponsible app more then I waiting ... 1 \n", + "3 NaN Worst 1 \n", + "4 NaN Too much expensive .. try UBer... They are pro... 1 \n", + "\n", + " thumbs_up review_date developer_response developer_response_date \\\n", + "0 0.0 2023-08-10 16:40:50 NaN NaN \n", + "1 0.0 2023-08-10 16:36:14 NaN NaN \n", + "2 0.0 2023-08-10 16:29:31 NaN NaN \n", + "3 0.0 2023-08-10 15:52:06 NaN NaN \n", + "4 0.0 2023-08-10 15:51:10 NaN NaN \n", + "\n", + " appVersion laguage_code country_code \n", + "0 6.3.2 en in \n", + "1 NaN en in \n", + "2 6.3.1 en in \n", + "3 5.0.4 en in \n", + "4 NaN en in " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n", + "ola_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "878a39c4-45d5-41d6-82b0-9c373c28e280", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "count 357678.000000\n", + "mean 92.402697\n", + "std 125.489169\n", + "min 1.000000\n", + "25% 8.000000\n", + "50% 33.000000\n", + "75% 131.000000\n", + "max 2877.000000\n", + "Name: review_length, dtype: float64\n" + ] + } + ], + "source": [ + "# Check average review length\n", + "ola_df['review_length'] = ola_df['review_description'].str.len()\n", + "print(ola_df['review_length'].describe())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "1dd032ba-343b-4402-9d96-ee5e0432ab07", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Substantive reviews: 204715\n" + ] + } + ], + "source": [ + "# Filter out very short reviews\n", + "substantive_reviews = ola_df[ola_df['review_length'] > 20]\n", + "print(f\"Substantive reviews: {len(substantive_reviews)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "count 1.069447e+06\n", + "mean 7.023987e+01\n", + "std 1.158196e+02\n", + "min 1.000000e+00\n", + "25% 8.000000e+00\n", + "50% 2.100000e+01\n", + "75% 7.800000e+01\n", + "max 3.792000e+03\n", + "Name: review_length, dtype: float64\n" + ] + } + ], + "source": [ + "# Check average review length\n", + "uber['review_length'] = uber['review_description'].str.len()\n", + "print(uber['review_length'].describe())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "2dd05939-e87c-443d-9012-e5f45cf64ff5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Substantive reviews: 542110\n" + ] + } + ], + "source": [ + "# Filter out very short reviews\n", + "substantive_reviews = uber[uber['review_length'] > 20]\n", + "print(f\"Substantive reviews: {len(substantive_reviews)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/datasets_reviews.ipynb b/datasets_reviews.ipynb index 910a184..14f23a8 100644 --- a/datasets_reviews.ipynb +++ b/datasets_reviews.ipynb @@ -2,996 +2,40 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", - "import pandas as pd" + "import pandas as pd\n", + "from pathlib import Path" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "7c97ff6e-05a0-4ed1-945a-04f024b3045a", + "execution_count": 11, + "id": "0c897ead-dfb5-4d18-bcfc-949824a0868f", "metadata": {}, "outputs": [], "source": [ - "csv0 = pd.read_csv(\"spotify.csv\")" + "uber = Path.home() / 'google-drive' / 'Charlie_6013_RECLASS' / 'Data' / 'Raw' / 'Uber Customer Reviews.csv'" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "c0631560-c1be-4bbf-b050-b6a552e74d63", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Time_submittedReviewRatingTotal_thumbsupReply
02022-07-09 15:00:00Great music service, the audio is high quality...52NaN
12022-07-09 14:21:22Please ignore previous negative rating. This a...51NaN
22022-07-09 13:27:32This pop-up \"Get the best Spotify experience o...40NaN
32022-07-09 13:26:45Really buggy and terrible to use as of recently11NaN
42022-07-09 13:20:49Dear Spotify why do I get songs that I didn't ...11NaN
\n", - "
" - ], - "text/plain": [ - " Time_submitted Review \\\n", - "0 2022-07-09 15:00:00 Great music service, the audio is high quality... \n", - "1 2022-07-09 14:21:22 Please ignore previous negative rating. This a... \n", - "2 2022-07-09 13:27:32 This pop-up \"Get the best Spotify experience o... \n", - "3 2022-07-09 13:26:45 Really buggy and terrible to use as of recently \n", - "4 2022-07-09 13:20:49 Dear Spotify why do I get songs that I didn't ... \n", - "\n", - " Rating Total_thumbsup Reply \n", - "0 5 2 NaN \n", - "1 5 1 NaN \n", - "2 4 0 NaN \n", - "3 1 1 NaN \n", - "4 1 1 NaN " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv0.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "bd769aee-cbe3-4237-b420-4c3bcd8eec73", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Time_submittedReviewRatingTotal_thumbsupReply
615892022-01-01 03:01:29Even though it was communicated that lyrics fe...16NaN
615902022-01-01 02:13:40Use to be sooo good back when I had it, and wh...10NaN
615912022-01-01 01:02:29This app would be good if not for it taking ov...210NaN
615922022-01-01 00:49:23The app is good hard to navigate and won't jus...21NaN
615932022-01-01 00:19:09Its good but sometimes it doesnt load the musi...40NaN
\n", - "
" - ], - "text/plain": [ - " Time_submitted Review \\\n", - "61589 2022-01-01 03:01:29 Even though it was communicated that lyrics fe... \n", - "61590 2022-01-01 02:13:40 Use to be sooo good back when I had it, and wh... \n", - "61591 2022-01-01 01:02:29 This app would be good if not for it taking ov... \n", - "61592 2022-01-01 00:49:23 The app is good hard to navigate and won't jus... \n", - "61593 2022-01-01 00:19:09 Its good but sometimes it doesnt load the musi... \n", - "\n", - " Rating Total_thumbsup Reply \n", - "61589 1 6 NaN \n", - "61590 1 0 NaN \n", - "61591 2 10 NaN \n", - "61592 2 1 NaN \n", - "61593 4 0 NaN " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv0.tail()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "4e1fd6d9-df1e-4615-aae2-203559d51cd6", + "execution_count": 12, + "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01", "metadata": {}, "outputs": [], "source": [ - "csv1 = pd.read_csv(\"Airbnb_Open_Data.csv\", low_memory=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "1efba903-5004-4d7b-a1ee-42f333111055", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idNAMEhost idhost_identity_verifiedhost nameneighbourhood groupneighbourhoodlatlongcountry...service feeminimum nightsnumber of reviewslast reviewreviews per monthreview rate numbercalculated host listings countavailability 365house_ruleslicense
01001254Clean & quiet apt home by the park80014485718unconfirmedMadalineBrooklynKensington40.64749-73.97237United States...$19310.09.010/19/20210.214.06.0286.0Clean up and treat the home the way you'd like...NaN
11002102Skylit Midtown Castle52335172823verifiedJennaManhattanMidtown40.75362-73.98377United States...$2830.045.05/21/20220.384.02.0228.0Pet friendly but please confirm with me if the...NaN
21002403THE VILLAGE OF HARLEM....NEW YORK !78829239556NaNEliseManhattanHarlem40.80902-73.94190United States...$1243.00.0NaNNaN5.01.0352.0I encourage you to use my kitchen, cooking and...NaN
31002755NaN85098326012unconfirmedGarryBrooklynClinton Hill40.68514-73.95976United States...$7430.0270.07/5/20194.644.01.0322.0NaNNaN
41003689Entire Apt: Spacious Studio/Loft by central park92037596077verifiedLyndonManhattanEast Harlem40.79851-73.94399United States...$4110.09.011/19/20180.103.01.0289.0Please no smoking in the house, porch or on th...NaN
\n", - "

5 rows × 26 columns

\n", - "
" - ], - "text/plain": [ - " id NAME host id \\\n", - "0 1001254 Clean & quiet apt home by the park 80014485718 \n", - "1 1002102 Skylit Midtown Castle 52335172823 \n", - "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n", - "3 1002755 NaN 85098326012 \n", - "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n", - "\n", - " host_identity_verified host name neighbourhood group neighbourhood \\\n", - "0 unconfirmed Madaline Brooklyn Kensington \n", - "1 verified Jenna Manhattan Midtown \n", - "2 NaN Elise Manhattan Harlem \n", - "3 unconfirmed Garry Brooklyn Clinton Hill \n", - "4 verified Lyndon Manhattan East Harlem \n", - "\n", - " lat long country ... service fee minimum nights \\\n", - "0 40.64749 -73.97237 United States ... $193 10.0 \n", - "1 40.75362 -73.98377 United States ... $28 30.0 \n", - "2 40.80902 -73.94190 United States ... $124 3.0 \n", - "3 40.68514 -73.95976 United States ... $74 30.0 \n", - "4 40.79851 -73.94399 United States ... $41 10.0 \n", - "\n", - " number of reviews last review reviews per month review rate number \\\n", - "0 9.0 10/19/2021 0.21 4.0 \n", - "1 45.0 5/21/2022 0.38 4.0 \n", - "2 0.0 NaN NaN 5.0 \n", - "3 270.0 7/5/2019 4.64 4.0 \n", - "4 9.0 11/19/2018 0.10 3.0 \n", - "\n", - " calculated host listings count availability 365 \\\n", - "0 6.0 286.0 \n", - "1 2.0 228.0 \n", - "2 1.0 352.0 \n", - "3 1.0 322.0 \n", - "4 1.0 289.0 \n", - "\n", - " house_rules license \n", - "0 Clean up and treat the home the way you'd like... NaN \n", - "1 Pet friendly but please confirm with me if the... NaN \n", - "2 I encourage you to use my kitchen, cooking and... NaN \n", - "3 NaN NaN \n", - "4 Please no smoking in the house, porch or on th... NaN \n", - "\n", - "[5 rows x 26 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv1.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "39d543be-013a-4976-942d-f9884274c7be", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idNAMEhost idhost_identity_verifiedhost nameneighbourhood groupneighbourhoodlatlongcountry...service feeminimum nightsnumber of reviewslast reviewreviews per monthreview rate numbercalculated host listings countavailability 365house_ruleslicense
01001254Clean & quiet apt home by the park80014485718unconfirmedMadalineBrooklynKensington40.64749-73.97237United States...$19310.09.010/19/20210.214.06.0286.0Clean up and treat the home the way you'd like...NaN
11002102Skylit Midtown Castle52335172823verifiedJennaManhattanMidtown40.75362-73.98377United States...$2830.045.05/21/20220.384.02.0228.0Pet friendly but please confirm with me if the...NaN
21002403THE VILLAGE OF HARLEM....NEW YORK !78829239556NaNEliseManhattanHarlem40.80902-73.94190United States...$1243.00.0NaNNaN5.01.0352.0I encourage you to use my kitchen, cooking and...NaN
31002755NaN85098326012unconfirmedGarryBrooklynClinton Hill40.68514-73.95976United States...$7430.0270.07/5/20194.644.01.0322.0NaNNaN
41003689Entire Apt: Spacious Studio/Loft by central park92037596077verifiedLyndonManhattanEast Harlem40.79851-73.94399United States...$4110.09.011/19/20180.103.01.0289.0Please no smoking in the house, porch or on th...NaN
\n", - "

5 rows × 26 columns

\n", - "
" - ], - "text/plain": [ - " id NAME host id \\\n", - "0 1001254 Clean & quiet apt home by the park 80014485718 \n", - "1 1002102 Skylit Midtown Castle 52335172823 \n", - "2 1002403 THE VILLAGE OF HARLEM....NEW YORK ! 78829239556 \n", - "3 1002755 NaN 85098326012 \n", - "4 1003689 Entire Apt: Spacious Studio/Loft by central park 92037596077 \n", - "\n", - " host_identity_verified host name neighbourhood group neighbourhood \\\n", - "0 unconfirmed Madaline Brooklyn Kensington \n", - "1 verified Jenna Manhattan Midtown \n", - "2 NaN Elise Manhattan Harlem \n", - "3 unconfirmed Garry Brooklyn Clinton Hill \n", - "4 verified Lyndon Manhattan East Harlem \n", - "\n", - " lat long country ... service fee minimum nights \\\n", - "0 40.64749 -73.97237 United States ... $193 10.0 \n", - "1 40.75362 -73.98377 United States ... $28 30.0 \n", - "2 40.80902 -73.94190 United States ... $124 3.0 \n", - "3 40.68514 -73.95976 United States ... $74 30.0 \n", - "4 40.79851 -73.94399 United States ... $41 10.0 \n", - "\n", - " number of reviews last review reviews per month review rate number \\\n", - "0 9.0 10/19/2021 0.21 4.0 \n", - "1 45.0 5/21/2022 0.38 4.0 \n", - "2 0.0 NaN NaN 5.0 \n", - "3 270.0 7/5/2019 4.64 4.0 \n", - "4 9.0 11/19/2018 0.10 3.0 \n", - "\n", - " calculated host listings count availability 365 \\\n", - "0 6.0 286.0 \n", - "1 2.0 228.0 \n", - "2 1.0 352.0 \n", - "3 1.0 322.0 \n", - "4 1.0 289.0 \n", - "\n", - " house_rules license \n", - "0 Clean up and treat the home the way you'd like... NaN \n", - "1 Pet friendly but please confirm with me if the... NaN \n", - "2 I encourage you to use my kitchen, cooking and... NaN \n", - "3 NaN NaN \n", - "4 Please no smoking in the house, porch or on th... NaN \n", - "\n", - "[5 rows x 26 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv1.head()" + "uber_df = pd.read_csv(uber, low_memory=False)" ] }, { "cell_type": "code", "execution_count": 13, - "id": "95f93b29-94be-4c93-9793-cf51c2ba2442", - "metadata": {}, - "outputs": [], - "source": [ - "csv02 = pd.read_csv(\"WAZE_REVIEWS.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7f8b10d2-6225-47d8-82b5-b8041ee6412b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0review_idpseudo_author_idauthor_namereview_textreview_ratingreview_likesauthor_app_versionreview_timestamp
006caba53d-789d-4733-bad5-c7491daf80f2152618553977019693742A Google userNice app need to add red light cam.500.99.2.32009-06-30 16:48:15
1130c15838-8b02-4dae-8f51-25905cb40b68234382942865437071667A Google userReally cool social app. Lots of potential to b...500.99.2.32009-06-30 16:58:43
22c090400e-f88f-4129-930d-a650f3163a11174473604608358796368A Google userI was all excited about this app (ehat a great...100.99.2.32009-06-30 17:08:33
33f6f37456-793b-4786-af6e-454a811361bf286593453219054880269A Google userI love this app! Lol500.99.2.32009-06-30 17:37:22
448ae5d962-7c0c-476d-82fa-79f6e5484acc167276875678680630145A Google userGreat app i like the idea of your car being pa...400.99.2.32009-06-30 23:58:43
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 review_id pseudo_author_id \\\n", - "0 0 6caba53d-789d-4733-bad5-c7491daf80f2 152618553977019693742 \n", - "1 1 30c15838-8b02-4dae-8f51-25905cb40b68 234382942865437071667 \n", - "2 2 c090400e-f88f-4129-930d-a650f3163a11 174473604608358796368 \n", - "3 3 f6f37456-793b-4786-af6e-454a811361bf 286593453219054880269 \n", - "4 4 8ae5d962-7c0c-476d-82fa-79f6e5484acc 167276875678680630145 \n", - "\n", - " author_name review_text \\\n", - "0 A Google user Nice app need to add red light cam. \n", - "1 A Google user Really cool social app. Lots of potential to b... \n", - "2 A Google user I was all excited about this app (ehat a great... \n", - "3 A Google user I love this app! Lol \n", - "4 A Google user Great app i like the idea of your car being pa... \n", - "\n", - " review_rating review_likes author_app_version review_timestamp \n", - "0 5 0 0.99.2.3 2009-06-30 16:48:15 \n", - "1 5 0 0.99.2.3 2009-06-30 16:58:43 \n", - "2 1 0 0.99.2.3 2009-06-30 17:08:33 \n", - "3 5 0 0.99.2.3 2009-06-30 17:37:22 \n", - "4 4 0 0.99.2.3 2009-06-30 23:58:43 " - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv02.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "39b1151a-655a-4191-8fcb-2ff1b40e5edf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0review_idpseudo_author_idauthor_namereview_textreview_ratingreview_likesauthor_app_versionreview_timestamp
78006878006801655504-5a51-4c19-b313-2bd5fa3f253a680743620884748258838Ma********llFreezes30NaN2023-11-17 03:18:26
780069780069f04306cb-af60-4a44-aebc-c37122620319266638684561117704682Zu******elTo stuck10NaN2023-11-17 03:18:38
780070780070894e3c41-ca20-4781-9308-70eeb060a865154572309081670894420br**********jiracist made app104.99.0.22023-11-17 03:23:20
7800717800714fafb0b1-485e-473e-9bcd-d5c9848424d2154995071911163107981Mo***********daبهترین مثل همیشه.با آی پی ثابت های کانال تلگرا...504.99.1.12023-11-17 04:05:02
780072780072d1570ba0-ffc5-4fc6-8d34-12daba4b38e2200574835524973617311Re***********izBest app ever used.504.99.0.22023-11-17 04:06:44
\n", - "
" - ], - "text/plain": [ - " Unnamed: 0 review_id \\\n", - "780068 780068 01655504-5a51-4c19-b313-2bd5fa3f253a \n", - "780069 780069 f04306cb-af60-4a44-aebc-c37122620319 \n", - "780070 780070 894e3c41-ca20-4781-9308-70eeb060a865 \n", - "780071 780071 4fafb0b1-485e-473e-9bcd-d5c9848424d2 \n", - "780072 780072 d1570ba0-ffc5-4fc6-8d34-12daba4b38e2 \n", - "\n", - " pseudo_author_id author_name \\\n", - "780068 680743620884748258838 Ma********ll \n", - "780069 266638684561117704682 Zu******el \n", - "780070 154572309081670894420 br**********ji \n", - "780071 154995071911163107981 Mo***********da \n", - "780072 200574835524973617311 Re***********iz \n", - "\n", - " review_text review_rating \\\n", - "780068 Freezes 3 \n", - "780069 To stuck 1 \n", - "780070 racist made app 1 \n", - "780071 بهترین مثل همیشه.با آی پی ثابت های کانال تلگرا... 5 \n", - "780072 Best app ever used. 5 \n", - "\n", - " review_likes author_app_version review_timestamp \n", - "780068 0 NaN 2023-11-17 03:18:26 \n", - "780069 0 NaN 2023-11-17 03:18:38 \n", - "780070 0 4.99.0.2 2023-11-17 03:23:20 \n", - "780071 0 4.99.1.1 2023-11-17 04:05:02 \n", - "780072 0 4.99.0.2 2023-11-17 04:06:44 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "csv02.tail()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "2fc95472-e0ae-45f2-86fd-4aa023239c0d", + "id": "9b8469b3-c606-461f-aaef-9619b7dc1ffd", "metadata": {}, "outputs": [ { @@ -1145,303 +189,141 @@ "4 4.486.10002 en in " ] }, - "execution_count": 20, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "uber = pd.read_csv(\"Uber Customer Reviews.csv\", low_memory=False)\n", - "uber.head()" + "uber_df.head()" ] }, { "cell_type": "code", - "execution_count": 24, - "id": "d4ace5a2-346a-4099-9854-1cac2749a216", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1069616, 13)\n" - ] - } - ], - "source": [ - "print(np.shape(uber))" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "ad7ac03d-a9df-4688-ad3c-8e354996f52c", + "execution_count": 15, + "id": "1709a2cc-4f7a-4e77-994e-68668612caff", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
sourcereview_iduser_namereview_titlereview_descriptionratingthumbs_upreview_datedeveloper_responsedeveloper_response_dateappVersionlaguage_codecountry_code
0Google Playfbc7ffc9-5a89-446e-87fd-d69bf4a7f984Puipuii RalteNaNThe map in Ola is so messed up, i have to pay ...10.02023-08-10 16:40:50NaNNaN6.3.2enin
1Google Play5a0051fb-220a-45b2-ba94-a15a2949218fDeepak KumarNaNDeepak Kumar.... 🙏🙏🙏🙏🙏]50.02023-08-10 16:36:14NaNNaNNaNenin
2Google Play71ebf933-b734-474d-bb65-a18c90906ed2Ahamed AzarudeenNaNSuch aa irresponsible app more then I waiting ...10.02023-08-10 16:29:31NaNNaN6.3.1enin
3Google Playe1cc0010-60b3-4126-99c2-e8549088566aRahil SyedNaNWorst10.02023-08-10 15:52:06NaNNaN5.0.4enin
4Google Play77cf1be1-b428-4493-ae25-e0f288f79b8fvin 007NaNToo much expensive .. try UBer... They are pro...10.02023-08-10 15:51:10NaNNaNNaNenin
\n", - "
" - ], "text/plain": [ - " source review_id user_name \\\n", - "0 Google Play fbc7ffc9-5a89-446e-87fd-d69bf4a7f984 Puipuii Ralte \n", - "1 Google Play 5a0051fb-220a-45b2-ba94-a15a2949218f Deepak Kumar \n", - "2 Google Play 71ebf933-b734-474d-bb65-a18c90906ed2 Ahamed Azarudeen \n", - "3 Google Play e1cc0010-60b3-4126-99c2-e8549088566a Rahil Syed \n", - "4 Google Play 77cf1be1-b428-4493-ae25-e0f288f79b8f vin 007 \n", - "\n", - " review_title review_description rating \\\n", - "0 NaN The map in Ola is so messed up, i have to pay ... 1 \n", - "1 NaN Deepak Kumar.... 🙏🙏🙏🙏🙏] 5 \n", - "2 NaN Such aa irresponsible app more then I waiting ... 1 \n", - "3 NaN Worst 1 \n", - "4 NaN Too much expensive .. try UBer... They are pro... 1 \n", - "\n", - " thumbs_up review_date developer_response developer_response_date \\\n", - "0 0.0 2023-08-10 16:40:50 NaN NaN \n", - "1 0.0 2023-08-10 16:36:14 NaN NaN \n", - "2 0.0 2023-08-10 16:29:31 NaN NaN \n", - "3 0.0 2023-08-10 15:52:06 NaN NaN \n", - "4 0.0 2023-08-10 15:51:10 NaN NaN \n", - "\n", - " appVersion laguage_code country_code \n", - "0 6.3.2 en in \n", - "1 NaN en in \n", - "2 6.3.1 en in \n", - "3 5.0.4 en in \n", - "4 NaN en in " + "(1069616, 13)" ] }, - "execution_count": 27, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ola_df = pd.read_csv(\"Ola Customer Reviews.csv\", low_memory=False)\n", - "ola_df.head()" + "np.shape(uber_df)" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "878a39c4-45d5-41d6-82b0-9c373c28e280", + "execution_count": 17, + "id": "06c0c03c-14ba-4451-a6ea-44d36e85327c", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "count 357678.000000\n", - "mean 92.402697\n", - "std 125.489169\n", - "min 1.000000\n", - "25% 8.000000\n", - "50% 33.000000\n", - "75% 131.000000\n", - "max 2877.000000\n", - "Name: review_length, dtype: float64\n" - ] + "data": { + "text/plain": [ + "['source',\n", + " 'review_id',\n", + " 'user_name',\n", + " 'review_title',\n", + " 'review_description',\n", + " 'rating',\n", + " 'thumbs_up',\n", + " 'review_date',\n", + " 'developer_response',\n", + " 'developer_response_date',\n", + " 'appVersion',\n", + " 'laguage_code',\n", + " 'country_code']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Check average review length\n", - "ola_df['review_length'] = ola_df['review_description'].str.len()\n", - "print(ola_df['review_length'].describe())\n", - "\n" + "uber_df.columns.tolist()" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "1dd032ba-343b-4402-9d96-ee5e0432ab07", + "execution_count": 19, + "id": "d22d3bce-eac0-4d02-a4ef-38343f4958ff", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Substantive reviews: 204715\n" - ] + "data": { + "text/plain": [ + "source object\n", + "review_id object\n", + "user_name object\n", + "review_title object\n", + "review_description object\n", + "rating int64\n", + "thumbs_up float64\n", + "review_date object\n", + "developer_response object\n", + "developer_response_date object\n", + "appVersion object\n", + "laguage_code object\n", + "country_code object\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Filter out very short reviews\n", - "substantive_reviews = ola_df[ola_df['review_length'] > 20]\n", - "print(f\"Substantive reviews: {len(substantive_reviews)}\")" + "uber_df.dtypes" ] }, { "cell_type": "code", - "execution_count": 30, - "id": "2e58bf99-c08e-4e41-9b98-124b3f9e6145", + "execution_count": 22, + "id": "e08f5eae-7921-4526-b8fd-29038c55e1bb", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "count 1.069447e+06\n", - "mean 7.023987e+01\n", - "std 1.158196e+02\n", - "min 1.000000e+00\n", - "25% 8.000000e+00\n", - "50% 2.100000e+01\n", - "75% 7.800000e+01\n", - "max 3.792000e+03\n", - "Name: review_length, dtype: float64\n" - ] + "data": { + "text/plain": [ + "source 0\n", + "review_id 0\n", + "user_name 1\n", + "review_title 1067436\n", + "review_description 169\n", + "rating 0\n", + "thumbs_up 2180\n", + "review_date 0\n", + "developer_response 871352\n", + "developer_response_date 872338\n", + "appVersion 241548\n", + "laguage_code 0\n", + "country_code 0\n", + "dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "# Check average review length\n", - "uber['review_length'] = uber['review_description'].str.len()\n", - "print(uber['review_length'].describe())\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "2dd05939-e87c-443d-9012-e5f45cf64ff5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Substantive reviews: 542110\n" - ] - } - ], - "source": [ - "# Filter out very short reviews\n", - "substantive_reviews = uber[uber['review_length'] > 20]\n", - "print(f\"Substantive reviews: {len(substantive_reviews)}\")" + "uber_df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": null, - "id": "75ad8e81-3f11-4152-9494-b95bbba6fa01", + "id": "ea59d211-9958-46f6-bf76-65d8d36c50e4", "metadata": {}, "outputs": [], "source": []