Added comments and made a start on infer.py

This commit is contained in:
2026-03-28 22:31:10 +00:00
parent 753723694b
commit 0af8bff4a8
7 changed files with 301 additions and 22 deletions

View File

@@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"id": "79ac71dd",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "aa9117f0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>source</th>\n",
" <th>review_id</th>\n",
" <th>user_name</th>\n",
" <th>review_title</th>\n",
" <th>review_description</th>\n",
" <th>rating</th>\n",
" <th>thumbs_up</th>\n",
" <th>review_date</th>\n",
" <th>developer_response</th>\n",
" <th>developer_response_date</th>\n",
" <th>appVersion</th>\n",
" <th>laguage_code</th>\n",
" <th>country_code</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Google Play</td>\n",
" <td>18d6584c-d0e9-4833-a744-f607058aee97</td>\n",
" <td>Milky Way</td>\n",
" <td>NaN</td>\n",
" <td>Suddenly, the driver can't have my location an...</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>2023-08-10 17:48:51</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>en</td>\n",
" <td>in</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Google Play</td>\n",
" <td>50a08f18-cece-4ddf-b617-028844c8aa28</td>\n",
" <td>Bradlee Severa</td>\n",
" <td>NaN</td>\n",
" <td>Very cordial.. And helped with a quick turnaro...</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>2023-08-10 17:38:35</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.485.10000</td>\n",
" <td>en</td>\n",
" <td>in</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Google Play</td>\n",
" <td>b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7</td>\n",
" <td>Amit Aggarwal</td>\n",
" <td>NaN</td>\n",
" <td>Very good experience</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>2023-08-10 17:38:17</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.486.10002</td>\n",
" <td>en</td>\n",
" <td>in</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Google Play</td>\n",
" <td>502702a9-25ed-4373-a96c-7fa1f06caacd</td>\n",
" <td>Bryant Inman</td>\n",
" <td>NaN</td>\n",
" <td>All I use</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>2023-08-10 17:37:45</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.467.10008</td>\n",
" <td>en</td>\n",
" <td>in</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Google Play</td>\n",
" <td>f47a3fb6-23db-49bd-9e63-f33c8d724d07</td>\n",
" <td>Addie Whittaker</td>\n",
" <td>NaN</td>\n",
" <td>I have enjoyed traveling by Uber my drivers ha...</td>\n",
" <td>5</td>\n",
" <td>0.0</td>\n",
" <td>2023-08-10 17:36:56</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.486.10002</td>\n",
" <td>en</td>\n",
" <td>in</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" source review_id user_name \\\n",
"0 Google Play 18d6584c-d0e9-4833-a744-f607058aee97 Milky Way \n",
"1 Google Play 50a08f18-cece-4ddf-b617-028844c8aa28 Bradlee Severa \n",
"2 Google Play b0d8e75a-80a7-4dcd-abaf-72b046dbeeb7 Amit Aggarwal \n",
"3 Google Play 502702a9-25ed-4373-a96c-7fa1f06caacd Bryant Inman \n",
"4 Google Play f47a3fb6-23db-49bd-9e63-f33c8d724d07 Addie Whittaker \n",
"\n",
" review_title review_description rating \\\n",
"0 NaN Suddenly, the driver can't have my location an... 1 \n",
"1 NaN Very cordial.. And helped with a quick turnaro... 5 \n",
"2 NaN Very good experience 5 \n",
"3 NaN All I use 5 \n",
"4 NaN I have enjoyed traveling by Uber my drivers ha... 5 \n",
"\n",
" thumbs_up review_date developer_response developer_response_date \\\n",
"0 0.0 2023-08-10 17:48:51 NaN NaN \n",
"1 0.0 2023-08-10 17:38:35 NaN NaN \n",
"2 0.0 2023-08-10 17:38:17 NaN NaN \n",
"3 0.0 2023-08-10 17:37:45 NaN NaN \n",
"4 0.0 2023-08-10 17:36:56 NaN NaN \n",
"\n",
" appVersion laguage_code country_code \n",
"0 NaN en in \n",
"1 4.485.10000 en in \n",
"2 4.486.10002 en in \n",
"3 4.467.10008 en in \n",
"4 4.486.10002 en in "
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"../data/raw/uber_reviews.csv\", low_memory=False)\n",
"df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "36683790",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.DataFrame'>\n",
"RangeIndex: 1069616 entries, 0 to 1069615\n",
"Data columns (total 13 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 source 1069616 non-null str \n",
" 1 review_id 1069616 non-null str \n",
" 2 user_name 1069615 non-null str \n",
" 3 review_title 2180 non-null str \n",
" 4 review_description 1069447 non-null str \n",
" 5 rating 1069616 non-null int64 \n",
" 6 thumbs_up 1067436 non-null float64\n",
" 7 review_date 1069616 non-null str \n",
" 8 developer_response 198264 non-null str \n",
" 9 developer_response_date 197278 non-null str \n",
" 10 appVersion 828068 non-null str \n",
" 11 laguage_code 1069616 non-null str \n",
" 12 country_code 1069616 non-null str \n",
"dtypes: float64(1), int64(1), str(11)\n",
"memory usage: 106.1 MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "0b1c7f73",
"metadata": {},
"outputs": [],
"source": [
"df = df[['review_description']].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "90e6d653",
"metadata": {},
"outputs": [],
"source": [
"df.head(5)\n",
"df.to_csv(\"../data/raw/review_description.csv\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "multitag",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}