354 lines
11 KiB
Plaintext
354 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"from pathlib import Path"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "0c897ead-dfb5-4d18-bcfc-949824a0868f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"uber = Path.home() / 'google-drive' / 'Charlie_6013_RECLASS' / 'Data' / 'Raw' / 'Uber Customer Reviews.csv'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"uber_df = pd.read_csv(uber, low_memory=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "9b8469b3-c606-461f-aaef-9619b7dc1ffd",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>review_id</th>\n",
|
|
" <th>user_name</th>\n",
|
|
" <th>review_title</th>\n",
|
|
" <th>review_description</th>\n",
|
|
" <th>rating</th>\n",
|
|
" <th>thumbs_up</th>\n",
|
|
" <th>review_date</th>\n",
|
|
" <th>developer_response</th>\n",
|
|
" <th>developer_response_date</th>\n",
|
|
" <th>appVersion</th>\n",
|
|
" <th>laguage_code</th>\n",
|
|
" <th>country_code</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Google Play</td>\n",
|
|
" <td>18d6584c-d0e9-4833-a744-f607058aee97</td>\n",
|
|
" <td>Milky Way</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>Suddenly, the driver can't have my location an...</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2023-08-10 17:48:51</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>in</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Google Play</td>\n",
|
|
" <td>50a08f18-cece-4ddf-b617-028844c8aa28</td>\n",
|
|
" <td>Bradlee Severa</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>Very cordial.. And helped with a quick turnaro...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2023-08-10 17:38:35</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4.485.10000</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>in</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Google Play</td>\n",
|
|
" <td>b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7</td>\n",
|
|
" <td>Amit Aggarwal</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>Very good experience</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2023-08-10 17:38:17</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4.486.10002</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>in</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Google Play</td>\n",
|
|
" <td>502702a9-25ed-4373-a96c-7fa1f06caacd</td>\n",
|
|
" <td>Bryant Inman</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>All I use</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2023-08-10 17:37:45</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4.467.10008</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>in</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Google Play</td>\n",
|
|
" <td>f47a3fb6-23db-49bd-9e63-f33c8d724d07</td>\n",
|
|
" <td>Addie Whittaker</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>I have enjoyed traveling by Uber my drivers ha...</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2023-08-10 17:36:56</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4.486.10002</td>\n",
|
|
" <td>en</td>\n",
|
|
" <td>in</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" source review_id user_name \\\n",
|
|
"0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n",
|
|
"1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n",
|
|
"2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n",
|
|
"3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n",
|
|
"4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n",
|
|
"\n",
|
|
" review_title review_description rating \\\n",
|
|
"0 NaN Suddenly, the driver can't have my location an... 1 \n",
|
|
"1 NaN Very cordial.. And helped with a quick turnaro... 5 \n",
|
|
"2 NaN Very good experience 5 \n",
|
|
"3 NaN All I use 5 \n",
|
|
"4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n",
|
|
"\n",
|
|
" thumbs_up review_date developer_response developer_response_date \\\n",
|
|
"0 0.0 2023-08-10 17:48:51 NaN NaN \n",
|
|
"1 0.0 2023-08-10 17:38:35 NaN NaN \n",
|
|
"2 0.0 2023-08-10 17:38:17 NaN NaN \n",
|
|
"3 0.0 2023-08-10 17:37:45 NaN NaN \n",
|
|
"4 0.0 2023-08-10 17:36:56 NaN NaN \n",
|
|
"\n",
|
|
" appVersion laguage_code country_code \n",
|
|
"0 NaN en in \n",
|
|
"1 4.485.10000 en in \n",
|
|
"2 4.486.10002 en in \n",
|
|
"3 4.467.10008 en in \n",
|
|
"4 4.486.10002 en in "
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"uber_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "1709a2cc-4f7a-4e77-994e-68668612caff",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(1069616, 13)"
|
|
]
|
|
},
|
|
"execution_count": 15,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"np.shape(uber_df)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"id": "06c0c03c-14ba-4451-a6ea-44d36e85327c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['source',\n",
|
|
" 'review_id',\n",
|
|
" 'user_name',\n",
|
|
" 'review_title',\n",
|
|
" 'review_description',\n",
|
|
" 'rating',\n",
|
|
" 'thumbs_up',\n",
|
|
" 'review_date',\n",
|
|
" 'developer_response',\n",
|
|
" 'developer_response_date',\n",
|
|
" 'appVersion',\n",
|
|
" 'laguage_code',\n",
|
|
" 'country_code']"
|
|
]
|
|
},
|
|
"execution_count": 17,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"uber_df.columns.tolist()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "d22d3bce-eac0-4d02-a4ef-38343f4958ff",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"source object\n",
|
|
"review_id object\n",
|
|
"user_name object\n",
|
|
"review_title object\n",
|
|
"review_description object\n",
|
|
"rating int64\n",
|
|
"thumbs_up float64\n",
|
|
"review_date object\n",
|
|
"developer_response object\n",
|
|
"developer_response_date object\n",
|
|
"appVersion object\n",
|
|
"laguage_code object\n",
|
|
"country_code object\n",
|
|
"dtype: object"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"uber_df.dtypes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "e08f5eae-7921-4526-b8fd-29038c55e1bb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"source 0\n",
|
|
"review_id 0\n",
|
|
"user_name 1\n",
|
|
"review_title 1067436\n",
|
|
"review_description 169\n",
|
|
"rating 0\n",
|
|
"thumbs_up 2180\n",
|
|
"review_date 0\n",
|
|
"developer_response 871352\n",
|
|
"developer_response_date 872338\n",
|
|
"appVersion 241548\n",
|
|
"laguage_code 0\n",
|
|
"country_code 0\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 22,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"uber_df.isnull().sum()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ea59d211-9958-46f6-bf76-65d8d36c50e4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|