Moving on to multitag.py, sampling complete I think
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 7,
|
||||||
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
|
"id": "f3da59fb-eb6b-449f-b8d5-95ddacd456f2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 8,
|
||||||
"id": "0c897ead-dfb5-4d18-bcfc-949824a0868f",
|
"id": "0c897ead-dfb5-4d18-bcfc-949824a0868f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -24,17 +24,34 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 9,
|
||||||
"id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
|
"id": "75ad8e81-3f11-4152-9494-b95bbba6fa01",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"ename": "FileNotFoundError",
|
||||||
|
"evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\ch\\\\google-drive\\\\Charlie_6013_RECLASS\\\\Data\\\\Raw\\\\Uber Customer Reviews.csv'",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
|
"\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)",
|
||||||
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[9]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m uber_df = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[43muber\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlow_memory\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\multitag\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\multitag\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\multitag\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\multitag\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n",
|
||||||
|
"\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\multitag\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n",
|
||||||
|
"\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\ch\\\\google-drive\\\\Charlie_6013_RECLASS\\\\Data\\\\Raw\\\\Uber Customer Reviews.csv'"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"uber_df = pd.read_csv(uber, low_memory=False)"
|
"uber_df = pd.read_csv(uber, low_memory=False)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": null,
|
||||||
"id": "9b8469b3-c606-461f-aaef-9619b7dc1ffd",
|
"id": "9b8469b3-c606-461f-aaef-9619b7dc1ffd",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -200,7 +217,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": null,
|
||||||
"id": "1709a2cc-4f7a-4e77-994e-68668612caff",
|
"id": "1709a2cc-4f7a-4e77-994e-68668612caff",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -221,7 +238,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": null,
|
||||||
"id": "06c0c03c-14ba-4451-a6ea-44d36e85327c",
|
"id": "06c0c03c-14ba-4451-a6ea-44d36e85327c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -254,7 +271,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": null,
|
||||||
"id": "d22d3bce-eac0-4d02-a4ef-38343f4958ff",
|
"id": "d22d3bce-eac0-4d02-a4ef-38343f4958ff",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -288,7 +305,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": null,
|
||||||
"id": "e08f5eae-7921-4526-b8fd-29038c55e1bb",
|
"id": "e08f5eae-7921-4526-b8fd-29038c55e1bb",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -345,7 +362,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.11.13"
|
"version": "3.14.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 11,
|
||||||
"id": "470fe7c6-1614-4daf-879f-e6c399117c7b",
|
"id": "470fe7c6-1614-4daf-879f-e6c399117c7b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 12,
|
||||||
"id": "b855045e-2dd1-4fa1-ab5a-8ce8b50b02ee",
|
"id": "b855045e-2dd1-4fa1-ab5a-8ce8b50b02ee",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -23,7 +23,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 13,
|
||||||
"id": "e7da1fb6-ede6-46c6-8fbd-fa491d3351c5",
|
"id": "e7da1fb6-ede6-46c6-8fbd-fa491d3351c5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -178,7 +178,7 @@
|
|||||||
"4 4.486.10002 en in "
|
"4 4.486.10002 en in "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 24,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -189,7 +189,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 14,
|
||||||
"id": "5c02ec54-4583-4720-88c6-1110b52c3f88",
|
"id": "5c02ec54-4583-4720-88c6-1110b52c3f88",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -213,7 +213,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 15,
|
||||||
"id": "1da5d625-a4ba-49f8-8314-cc9e0f4ef96a",
|
"id": "1da5d625-a4ba-49f8-8314-cc9e0f4ef96a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -240,7 +240,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 16,
|
||||||
"id": "1c97e396-8f05-4df7-bd0a-1bbecf6911b4",
|
"id": "1c97e396-8f05-4df7-bd0a-1bbecf6911b4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -250,7 +250,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 17,
|
||||||
"id": "55324c94-4944-4844-b00e-dc08c8989f7b",
|
"id": "55324c94-4944-4844-b00e-dc08c8989f7b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -269,7 +269,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 18,
|
||||||
"id": "c45959fe-3e23-4831-a41a-94c89892247f",
|
"id": "c45959fe-3e23-4831-a41a-94c89892247f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -304,7 +304,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 19,
|
||||||
"id": "bf14e3db-a1b4-4fad-8102-b7ac25feeefa",
|
"id": "bf14e3db-a1b4-4fad-8102-b7ac25feeefa",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -322,7 +322,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 40,
|
"execution_count": 20,
|
||||||
"id": "8ccc07fa-9913-4047-ae17-35d2454eb059",
|
"id": "8ccc07fa-9913-4047-ae17-35d2454eb059",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -335,67 +335,65 @@
|
|||||||
"1 STAR REVIEWS:\n",
|
"1 STAR REVIEWS:\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cant put gift card on dont like app\n",
|
"Once drivers confirm the rider its showing with in 2 minutes but they take 25 minutes more and if driver cancel the ride I pay for that in next ride it's redicules\n",
|
||||||
"(Length: 8.0 words)\n",
|
"(Length: 32.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Zapłaciłem za przejazd, uber pobral środki z mojego konta. Potem byla aktualizacja ceny na niższą i znowu kazał płacić. Teraz aplikacja zablokowała się na ekranie potwierdzenia płatności.\n",
|
"they charge very high as they show before the ride\n",
|
||||||
"(Length: 27.0 words)\n",
|
"(Length: 10.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The app hasn't been able to process any payment. Takes forever to find a ride. I don't even know why this app still exists. Absolutely useless!\n",
|
"scam drivers, worst service\n",
|
||||||
"(Length: 26.0 words)\n",
|
"(Length: 4.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"2 STAR REVIEWS:\n",
|
"2 STAR REVIEWS:\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In spite of receiving payment and acknowledging by email the app shows \n",
|
"Drivers always ask is destination and cancel if they dont want to go ? Cant they already see destination before accepting ride ?\n",
|
||||||
"payment due and disallowed booking and service not available to me. 4 days \n",
|
"(Length: 23.0 words)\n",
|
||||||
"have lapsed no solution to my problem. Problem solvi...\n",
|
|
||||||
"(Length: 37.0 words)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"Poor\n",
|
"she hole her phone on her hand she driving 80\n",
|
||||||
"(Length: 1.0 words)\n",
|
"(Length: 10.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"I had to reset my password and now I cant get in. Its telling me that my phone number is already in use. I need this fixed\n",
|
"I7u.6f هنه\n",
|
||||||
"(Length: 27.0 words)\n",
|
"(Length: 2.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"3 STAR REVIEWS:\n",
|
"3 STAR REVIEWS:\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Nice\n",
|
"I think this app is very important to me\n",
|
||||||
|
"(Length: 9.0 words)\n",
|
||||||
|
"\n",
|
||||||
|
"Ok\n",
|
||||||
"(Length: 1.0 words)\n",
|
"(Length: 1.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Good rides\n",
|
"The rate will be one while booking and after the ride it changes if asked the reason is due to traffic, but it should be mentioned first only, the destination time is also calculated by uber only alon...\n",
|
||||||
"(Length: 2.0 words)\n",
|
"(Length: 53.0 words)\n",
|
||||||
"\n",
|
|
||||||
"Nice\n",
|
|
||||||
"(Length: 1.0 words)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"4 STAR REVIEWS:\n",
|
"4 STAR REVIEWS:\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Good service\n",
|
"Nice\n",
|
||||||
"(Length: 2.0 words)\n",
|
"(Length: 1.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"A mobile number of the car driver should be an icon if Uber book for any other person, then it can be given the number.\n",
|
"Good app but sometimes it take long time to get booking even the cabs are nearby and sometimes they even cancel the ride after confirming and making us wait for 30 mins and above\n",
|
||||||
"(Length: 25.0 words)\n",
|
"(Length: 34.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"many times pick up locations is shifted automatically . overall good much better\n",
|
"its good and helpful.. Thank you\n",
|
||||||
"(Length: 13.0 words)\n",
|
"(Length: 6.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"5 STAR REVIEWS:\n",
|
"5 STAR REVIEWS:\n",
|
||||||
"==========================================\n",
|
"==========================================\n",
|
||||||
"\n",
|
"\n",
|
||||||
"So friendly. Thank you\n",
|
"good service\n",
|
||||||
"(Length: 4.0 words)\n",
|
"(Length: 2.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"comfortable journey with effodable price\n",
|
"Drivers have been getting us home quickly and effectively.\n",
|
||||||
"(Length: 5.0 words)\n",
|
"(Length: 9.0 words)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Good\n",
|
"Hbby\n",
|
||||||
"(Length: 1.0 words)\n"
|
"(Length: 1.0 words)\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -419,6 +417,22 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "87a15e76-51c8-4586-acea-ca3176c18757",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "73c4bbb9-3f8e-4b4c-8538-539b140cf610",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -426,6 +440,18 @@
|
|||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.14.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
# TODO: Fix get_stratified_sample() replace broken x() with actual working logic
|
|
||||||
# TODO: Add verification comparison between ratings
|
# TODO: Add verification comparison between ratings
|
||||||
# TODO: implement sample_with_keywords() add to lists, and implement logic
|
|
||||||
# TODO: Clean up the logging print statements
|
# TODO: Clean up the logging print statements
|
||||||
|
|
||||||
|
|
||||||
@@ -178,9 +176,14 @@ class Sampler:
|
|||||||
# Drop helper columns
|
# Drop helper columns
|
||||||
keyword_sample = keyword_sample.drop(columns=['likely_bug', 'likely_feature'])
|
keyword_sample = keyword_sample.drop(columns=['likely_bug', 'likely_feature'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print(f"\n Total samples: {len(keyword_sample):,}")
|
print(f"\n Total samples: {len(keyword_sample):,}")
|
||||||
return keyword_sample
|
return keyword_sample
|
||||||
|
|
||||||
|
def sample_tiny_size(self):
|
||||||
|
mini_sample = self.data.sample(200) # reading some samples manually
|
||||||
|
return mini_sample
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -211,7 +214,7 @@ def main():
|
|||||||
print("2. original_distribution_sample() stratified by the original data distribution")
|
print("2. original_distribution_sample() stratified by the original data distribution")
|
||||||
print("3. get_keyword_boosted_sample() stratified using original distribution but also using a keyword dictionary")
|
print("3. get_keyword_boosted_sample() stratified using original distribution but also using a keyword dictionary")
|
||||||
|
|
||||||
choice = input("\nEnter choice (1-3): ").strip()
|
choice = input("\nEnter choice (1-4): ").strip()
|
||||||
|
|
||||||
if choice == '1':
|
if choice == '1':
|
||||||
sample = sampler.get_stratified_sample()
|
sample = sampler.get_stratified_sample()
|
||||||
@@ -225,6 +228,10 @@ def main():
|
|||||||
sample = sampler.sample_with_keywords()
|
sample = sampler.sample_with_keywords()
|
||||||
sampler.save_sample(sample, "multitag/data/uber_reviews_sampled.csv")
|
sampler.save_sample(sample, "multitag/data/uber_reviews_sampled.csv")
|
||||||
|
|
||||||
|
elif choice == '4':
|
||||||
|
sample = sampler.sample_tiny_size()
|
||||||
|
sampler.save_sample(sample,"multitag/data/uber_review_temp.csv")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 1,
|
||||||
"id": "739e61bf",
|
"id": "739e61bf",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 2,
|
||||||
"id": "d9da1b98",
|
"id": "d9da1b98",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -55,7 +55,7 @@
|
|||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>very cordial.. and helped with a quick turnaro...</td>\n",
|
" <td>very cordial. and helped with a quick turnarou...</td>\n",
|
||||||
" <td>5</td>\n",
|
" <td>5</td>\n",
|
||||||
" <td>11</td>\n",
|
" <td>11</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
@@ -84,13 +84,13 @@
|
|||||||
"text/plain": [
|
"text/plain": [
|
||||||
" review rating word_count\n",
|
" review rating word_count\n",
|
||||||
"0 suddenly, the driver can't have my location an... 1 23\n",
|
"0 suddenly, the driver can't have my location an... 1 23\n",
|
||||||
"1 very cordial.. and helped with a quick turnaro... 5 11\n",
|
"1 very cordial. and helped with a quick turnarou... 5 11\n",
|
||||||
"2 i have enjoyed traveling by uber my drivers ha... 5 23\n",
|
"2 i have enjoyed traveling by uber my drivers ha... 5 23\n",
|
||||||
"3 app is good but main problem is the drivers ca... 1 23\n",
|
"3 app is good but main problem is the drivers ca... 1 23\n",
|
||||||
"4 very bad experience no customer service 1 6"
|
"4 very bad experience no customer service 1 6"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -102,7 +102,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 3,
|
||||||
"id": "91dc1d9a",
|
"id": "91dc1d9a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -112,7 +112,7 @@
|
|||||||
"np.int64(6740)"
|
"np.int64(6740)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 12,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -123,7 +123,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 4,
|
||||||
"id": "827b6435",
|
"id": "827b6435",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -132,7 +132,7 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Max length review:\n",
|
"Max length review:\n",
|
||||||
"i've been using uber for a few years now and for the most part haven't had any problems the only problem i have ever had that was never resolved or explained to me was when i was refunded cancellation fee because the driver pulled into me and then literally looked in my face and drove away and canceled the ride.buy still charges me.a cancellation fee the problem wasn't being charged the fee because i was reimbursed a few minutes later the problem was that they gave me a $5 uber credit and i don't have a debit card so i use uber gift cards so i had a balance on the gift card of roughly $4.85 and is an uber credit of $5 but for some reason you can't combine them to use on a single gel ride and there is a minimum of i think $6 or $7 for a ride so i was never able to use the money i was reimbursed and the remaing balance on my gift card was not enough for a full ride and that was 4 months ago and still have not been able to get a response as to how i can use the funds remaining mn my uber account or the uber credits i was reimbursed.... another time the driver took between 7-10 minutes to set the destination and begin driving and then when i was trying to help her with the directions because the gps was bringing us rather far out of the way to reach out destination she was very rude with me and then when we were approaching the turn that we needed to make and i warned her that it was coming up shortly and then said a few more times as we got closer to it she drove passed it the reason i was making sure she was aware of this was because we were in a rather busy highway and she would not be able to turn again for quite some time and i was already cutting it close to being late for work due to the delay in starting the trip and her failure to follow the direction i was giving her to get to.our destination so i said pull over into.the gas station and i will walk it'll be faster then driving at this point but she failed to pull into parking lot she just stopped in the middle.lf the highway granted we were in the right lane but she still put us both in a very dangerous situation and we were in a bad section of newark nj at around 10pm which for most people would put them in an uncomfortable position seeing how as the crime rate is extremely high in this area luckily i'm familiar with the residence of the neighborhood because i lived there for a few years in the past and then she notified uber that i was a disgruntled passenger and made her nervous from my reaction and in was warned to not let anything like this happen again... and the final problem.i had with uber was when my mother had ordered me.an uber through her account and during the ride we some.how started to discuss religion and i'm far from a religious person i do not even consider myself a member of any type of religion at all. but my driver was very dedicated to the religion he practiced so he got very upset and decided to call mother and tell her that i didn't not go the full distance of the original route he was hired to drive and that he had dropped me off a few blocks from where j was going to judge in some illegal activities (he knew i had some personal issues i was overcoming recently because i had mention it to him briefly in the beginning of the trip) and that he was not going to charge her the full ride amount because i did. ot cimplete the full distance and this caused my mother to become very upset with me and called me extremely untilni sent her a picture showing that i was in fact where i said i was going and the app showed that he did drop me off at the predetermined destination.. sorry for the long post but out of the probably 100 or so ride i have used with uber those were the only 3 problems i have ever had and they all 3 were actually in the same week\n"
|
"i've been using uber for a few years now and for the most part haven't had any problems the only problem i have ever had that was never resolved or explained to me was when i was refunded cancellation fee because the driver pulled into me and then literally looked in my face and drove away and canceled the ride.buy still charges me.a cancellation fee the problem wasn't being charged the fee because i was reimbursed a few minutes later the problem was that they gave me a $5 uber credit and i don't have a debit card so i use uber gift cards so i had a balance on the gift card of roughly $4.85 and is an uber credit of $5 but for some reason you can't combine them to use on a single gel ride and there is a minimum of i think $6 or $7 for a ride so i was never able to use the money i was reimbursed and the remaing balance on my gift card was not enough for a full ride and that was 4 months ago and still have not been able to get a response as to how i can use the funds remaining mn my uber account or the uber credits i was reimbursed. another time the driver took between 7-10 minutes to set the destination and begin driving and then when i was trying to help her with the directions because the gps was bringing us rather far out of the way to reach out destination she was very rude with me and then when we were approaching the turn that we needed to make and i warned her that it was coming up shortly and then said a few more times as we got closer to it she drove passed it the reason i was making sure she was aware of this was because we were in a rather busy highway and she would not be able to turn again for quite some time and i was already cutting it close to being late for work due to the delay in starting the trip and her failure to follow the direction i was giving her to get to.our destination so i said pull over into.the gas station and i will walk it'll be faster then driving at this point but she failed to pull into parking lot she just stopped in the middle.lf the highway granted we were in the right lane but she still put us both in a very dangerous situation and we were in a bad section of newark nj at around 10pm which for most people would put them in an uncomfortable position seeing how as the crime rate is extremely high in this area luckily i'm familiar with the residence of the neighborhood because i lived there for a few years in the past and then she notified uber that i was a disgruntled passenger and made her nervous from my reaction and in was warned to not let anything like this happen again. and the final problem.i had with uber was when my mother had ordered me.an uber through her account and during the ride we some.how started to discuss religion and i'm far from a religious person i do not even consider myself a member of any type of religion at all. but my driver was very dedicated to the religion he practiced so he got very upset and decided to call mother and tell her that i didn't not go the full distance of the original route he was hired to drive and that he had dropped me off a few blocks from where j was going to judge in some illegal activities (he knew i had some personal issues i was overcoming recently because i had mention it to him briefly in the beginning of the trip) and that he was not going to charge her the full ride amount because i did. ot cimplete the full distance and this caused my mother to become very upset with me and called me extremely untilni sent her a picture showing that i was in fact where i said i was going and the app showed that he did drop me off at the predetermined destination. sorry for the long post but out of the probably 100 or so ride i have used with uber those were the only 3 problems i have ever had and they all 3 were actually in the same week\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -142,17 +142,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 5,
|
||||||
"id": "7a811e3d",
|
"id": "7a811e3d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"np.int64(11226)"
|
"np.int64(2839)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 18,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@@ -165,7 +165,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 6,
|
||||||
"id": "0a550434",
|
"id": "0a550434",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -175,7 +175,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": 7,
|
||||||
"id": "ec7b2ec5",
|
"id": "ec7b2ec5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -192,7 +192,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 8,
|
||||||
"id": "d68dac67",
|
"id": "d68dac67",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -202,7 +202,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 9,
|
||||||
"id": "9a8a49b2",
|
"id": "9a8a49b2",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -213,22 +213,26 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"Language distribution in 10+ word reviews:\n",
|
"Language distribution in 10+ word reviews:\n",
|
||||||
"lang\n",
|
"lang\n",
|
||||||
"en 960\n",
|
"en 939\n",
|
||||||
"es 11\n",
|
"id 12\n",
|
||||||
"id 7\n",
|
"es 10\n",
|
||||||
"pt 6\n",
|
"pt 9\n",
|
||||||
"sv 2\n",
|
"ar 7\n",
|
||||||
"sw 2\n",
|
"ru 5\n",
|
||||||
"ar 2\n",
|
"fr 3\n",
|
||||||
"ro 2\n",
|
"af 2\n",
|
||||||
"ta 1\n",
|
"no 2\n",
|
||||||
"bn 1\n",
|
|
||||||
"nl 1\n",
|
|
||||||
"da 1\n",
|
|
||||||
"so 1\n",
|
|
||||||
"ru 1\n",
|
|
||||||
"et 1\n",
|
"et 1\n",
|
||||||
"af 1\n",
|
"ca 1\n",
|
||||||
|
"hi 1\n",
|
||||||
|
"tr 1\n",
|
||||||
|
"cs 1\n",
|
||||||
|
"bn 1\n",
|
||||||
|
"sv 1\n",
|
||||||
|
"it 1\n",
|
||||||
|
"so 1\n",
|
||||||
|
"ro 1\n",
|
||||||
|
"da 1\n",
|
||||||
"Name: count, dtype: int64\n"
|
"Name: count, dtype: int64\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -245,6 +249,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "fd7576df-ce92-4c30-8466-34274290a934",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -263,7 +275,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.13.9"
|
"version": "3.14.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Reference in New Issue
Block a user