Replace node2vecTA.ipynb

parent 5b9fa710
......@@ -9,7 +9,7 @@
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"data = pd.read_csv(\"men-products.csv\", delimiter=',', index_col=0)"
"data = pd.read_csv(\"C:/Users/Agusti Frananda/Documents/PROYEK/myntra-mens-product-dataset/men-products.csv\", delimiter=',', index_col=0)"
]
},
{
......@@ -24,18 +24,16 @@
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 61456 entries, 1 to 61503\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 NAME 61455 non-null object\n",
" 1 CATEGORY 61456 non-null object\n",
" 2 DESCRIPTION & COLOR 61456 non-null object\n",
" 3 FABRIC 56623 non-null object\n",
" 4 IMAGE 61456 non-null object\n",
" 5 SIZE 57618 non-null object\n",
" 6 PRICE 61456 non-null object\n",
" 7 PRODUCT ID 61456 non-null int64 \n",
" 8 WEBSITE 61456 non-null object\n",
" 9 PRODUCT URL 61456 non-null object\n",
"NAME 61455 non-null object\n",
"CATEGORY 61456 non-null object\n",
"DESCRIPTION & COLOR 61456 non-null object\n",
"FABRIC 56623 non-null object\n",
"IMAGE 61456 non-null object\n",
"SIZE 57618 non-null object\n",
"PRICE 61456 non-null object\n",
"PRODUCT ID 61456 non-null int64\n",
"WEBSITE 61456 non-null object\n",
"PRODUCT URL 61456 non-null object\n",
"dtypes: int64(1), object(9)\n",
"memory usage: 5.2+ MB\n"
]
......@@ -77,52 +75,52 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>NAME</th>\n",
" <td>NAME</td>\n",
" <td>1</td>\n",
" <td>0.005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CATEGORY</th>\n",
" <td>CATEGORY</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DESCRIPTION &amp; COLOR</th>\n",
" <td>DESCRIPTION &amp; COLOR</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FABRIC</th>\n",
" <td>FABRIC</td>\n",
" <td>4833</td>\n",
" <td>24.165</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IMAGE</th>\n",
" <td>IMAGE</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SIZE</th>\n",
" <td>SIZE</td>\n",
" <td>3838</td>\n",
" <td>19.190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRICE</th>\n",
" <td>PRICE</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRODUCT ID</th>\n",
" <td>PRODUCT ID</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WEBSITE</th>\n",
" <td>WEBSITE</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRODUCT URL</th>\n",
" <td>PRODUCT URL</td>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
......@@ -161,8 +159,263 @@
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>CATEGORY</th>\n",
" <th>DESCRIPTION &amp; COLOR</th>\n",
" <th>FABRIC</th>\n",
" <th>IMAGE</th>\n",
" <th>SIZE</th>\n",
" <th>PRICE</th>\n",
" <th>PRODUCT ID</th>\n",
" <th>WEBSITE</th>\n",
" <th>PRODUCT URL</th>\n",
" </tr>\n",
" <tr>\n",
" <th>SERIAL NO</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>U.S. Polo Assn. Men Brown Genuine Leather Two ...</td>\n",
" <td>accessories</td>\n",
" <td>U.S. Polo Assn. Men Brown Genuine Leather Two ...</td>\n",
" <td>Genuine leather</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Height: 11.5 cm</td>\n",
" <td>809</td>\n",
" <td>1943420</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/wallets/us-polo-assn/us...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Baggit Men Black Solid Two Fold Wallet</td>\n",
" <td>accessories</td>\n",
" <td>Baggit Men Black Solid Two Fold Wallet, Baggi...</td>\n",
" <td>PU</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Height:</td>\n",
" <td>720</td>\n",
" <td>4608404</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/wallets/baggit/baggit-m...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>HRX by Hrithik Roshan Men Grey Solid Baseball Cap</td>\n",
" <td>accessories</td>\n",
" <td>HRX By Hrithik Roshan Men Grey Solid Baseball ...</td>\n",
" <td>NaN</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>NaN</td>\n",
" <td>279</td>\n",
" <td>2178513</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/caps/hrx-by-hrithik-ros...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Puma Unisex Grey Style Military Solid Baseball...</td>\n",
" <td>accessories</td>\n",
" <td>Puma Unisex Grey Style Military Solid Baseball...</td>\n",
" <td>NaN</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>NaN</td>\n",
" <td>499</td>\n",
" <td>6699035</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/caps/puma/puma-unisex-g...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>FabSeasons Beige Solid Scarf</td>\n",
" <td>accessories</td>\n",
" <td>FabSeasons Beige Solid Scarf, FabSeasons, Scar...</td>\n",
" <td>Acrylic</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Length:0.9 m</td>\n",
" <td>449</td>\n",
" <td>2439658</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/scarves/fabseasons/fabs...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>Ed Hardy Men Black Embellished Belt</td>\n",
" <td>accessories</td>\n",
" <td>Ed Hardy Men Black Embellished Belt, Ed Hardy...</td>\n",
" <td>Leather</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Width: 3.7 cm</td>\n",
" <td>1199</td>\n",
" <td>2238752</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/belts/ed-hardy/ed-hardy...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>Roadster Men Tan Brown Leather Belt</td>\n",
" <td>accessories</td>\n",
" <td>Roadster Men Tan Brown Leather Belt, Roadster,...</td>\n",
" <td>Leather</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Width: 4 cm</td>\n",
" <td>419</td>\n",
" <td>2975974</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/belts/roadster/roadster...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>Peora Silver-Toned Rhodium-Plated Stone-Studde...</td>\n",
" <td>accessories</td>\n",
" <td>Peora Silver Toned Rhodium Plated Stone Studde...</td>\n",
" <td>NaN</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>NaN</td>\n",
" <td>551</td>\n",
" <td>3006095</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/ring/peora/peora-silver...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>Royal Enfield Unisex White Urban Trooper Helme...</td>\n",
" <td>accessories</td>\n",
" <td>Royal Enfield Unisex White Urban Trooper Helme...</td>\n",
" <td>NaN</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>NaN</td>\n",
" <td>3500</td>\n",
" <td>2242802</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/helmets/royal-enfield/r...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>BuckleUp Men Black Leather Belt</td>\n",
" <td>accessories</td>\n",
" <td>BuckleUp Men Black Leather Belt, BuckleUp, Bel...</td>\n",
" <td>Leather</td>\n",
" <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
" <td>Width: 3.5 cm</td>\n",
" <td>517</td>\n",
" <td>1734718</td>\n",
" <td>Myntra</td>\n",
" <td>https://www.myntra.com/belts/buckleup/buckleup...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"0"
" NAME CATEGORY \\\n",
"SERIAL NO \n",
"1 U.S. Polo Assn. Men Brown Genuine Leather Two ... accessories \n",
"2 Baggit Men Black Solid Two Fold Wallet accessories \n",
"3 HRX by Hrithik Roshan Men Grey Solid Baseball Cap accessories \n",
"4 Puma Unisex Grey Style Military Solid Baseball... accessories \n",
"5 FabSeasons Beige Solid Scarf accessories \n",
"6 Ed Hardy Men Black Embellished Belt accessories \n",
"7 Roadster Men Tan Brown Leather Belt accessories \n",
"8 Peora Silver-Toned Rhodium-Plated Stone-Studde... accessories \n",
"9 Royal Enfield Unisex White Urban Trooper Helme... accessories \n",
"10 BuckleUp Men Black Leather Belt accessories \n",
"\n",
" DESCRIPTION & COLOR \\\n",
"SERIAL NO \n",
"1 U.S. Polo Assn. Men Brown Genuine Leather Two ... \n",
"2 Baggit Men Black Solid Two Fold Wallet, Baggi... \n",
"3 HRX By Hrithik Roshan Men Grey Solid Baseball ... \n",
"4 Puma Unisex Grey Style Military Solid Baseball... \n",
"5 FabSeasons Beige Solid Scarf, FabSeasons, Scar... \n",
"6 Ed Hardy Men Black Embellished Belt, Ed Hardy... \n",
"7 Roadster Men Tan Brown Leather Belt, Roadster,... \n",
"8 Peora Silver Toned Rhodium Plated Stone Studde... \n",
"9 Royal Enfield Unisex White Urban Trooper Helme... \n",
"10 BuckleUp Men Black Leather Belt, BuckleUp, Bel... \n",
"\n",
" FABRIC \\\n",
"SERIAL NO \n",
"1 Genuine leather \n",
"2 PU \n",
"3 NaN \n",
"4 NaN \n",
"5 Acrylic \n",
"6 Leather \n",
"7 Leather \n",
"8 NaN \n",
"9 NaN \n",
"10 Leather \n",
"\n",
" IMAGE SIZE \\\n",
"SERIAL NO \n",
"1 https://assets.myntassets.com/h_1440,q_100,w_1... Height: 11.5 cm \n",
"2 https://assets.myntassets.com/h_1440,q_100,w_1... Height: \n",
"3 https://assets.myntassets.com/h_1440,q_100,w_1... NaN \n",
"4 https://assets.myntassets.com/h_1440,q_100,w_1... NaN \n",
"5 https://assets.myntassets.com/h_1440,q_100,w_1... Length:0.9 m \n",
"6 https://assets.myntassets.com/h_1440,q_100,w_1... Width: 3.7 cm \n",
"7 https://assets.myntassets.com/h_1440,q_100,w_1... Width: 4 cm \n",
"8 https://assets.myntassets.com/h_1440,q_100,w_1... NaN \n",
"9 https://assets.myntassets.com/h_1440,q_100,w_1... NaN \n",
"10 https://assets.myntassets.com/h_1440,q_100,w_1... Width: 3.5 cm \n",
"\n",
" PRICE PRODUCT ID WEBSITE \\\n",
"SERIAL NO \n",
"1 809 1943420 Myntra \n",
"2 720 4608404 Myntra \n",
"3 279 2178513 Myntra \n",
"4 499 6699035 Myntra \n",
"5 449 2439658 Myntra \n",
"6 1199 2238752 Myntra \n",
"7 419 2975974 Myntra \n",
"8 551 3006095 Myntra \n",
"9 3500 2242802 Myntra \n",
"10 517 1734718 Myntra \n",
"\n",
" PRODUCT URL \n",
"SERIAL NO \n",
"1 https://www.myntra.com/wallets/us-polo-assn/us... \n",
"2 https://www.myntra.com/wallets/baggit/baggit-m... \n",
"3 https://www.myntra.com/caps/hrx-by-hrithik-ros... \n",
"4 https://www.myntra.com/caps/puma/puma-unisex-g... \n",
"5 https://www.myntra.com/scarves/fabseasons/fabs... \n",
"6 https://www.myntra.com/belts/ed-hardy/ed-hardy... \n",
"7 https://www.myntra.com/belts/roadster/roadster... \n",
"8 https://www.myntra.com/ring/peora/peora-silver... \n",
"9 https://www.myntra.com/helmets/royal-enfield/r... \n",
"10 https://www.myntra.com/belts/buckleup/buckleup... "
]
},
"execution_count": 4,
......@@ -171,13 +424,33 @@
}
],
"source": [
"data.duplicated().sum()"
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.duplicated().sum()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"del data['FABRIC']\n",
......@@ -191,7 +464,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -200,7 +473,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
......@@ -209,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
......@@ -228,7 +501,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [
{
......@@ -265,31 +538,31 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25553</th>\n",
" <td>25553</td>\n",
" <td>Fort Collins Men Red Solid Padded Jacket</td>\n",
" <td>Men Jackets Coats</td>\n",
" <td>Fort Collins Men Red Solid Padded Jacket, For...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18640</th>\n",
" <td>18640</td>\n",
" <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
" <td>Men Formal Trousers</td>\n",
" <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18543</th>\n",
" <td>18543</td>\n",
" <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
" <td>Men Formal Trousers</td>\n",
" <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21475</th>\n",
" <td>21475</td>\n",
" <td>Hanes Charcoal Grey Thermal T-Shirt</td>\n",
" <td>Innerwear &amp; Sleapwear</td>\n",
" <td>Hanes Charcoal Grey Thermal T Shirt, Hanes, T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14859</th>\n",
" <td>14859</td>\n",
" <td>Hancock Men Blue Regular Fit Striped Formal Shirt</td>\n",
" <td>Formal Shirts</td>\n",
" <td>Hancock Men Blue Regular Fit Striped Formal Sh...</td>\n",
......@@ -324,7 +597,7 @@
"14859 Hancock Men Blue Regular Fit Striped Formal Sh... "
]
},
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
......@@ -335,7 +608,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
......@@ -345,7 +618,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"outputs": [
{
......@@ -367,9 +640,18 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:17: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
" from collections import Mapping, defaultdict\n"
]
}
],
"source": [
"import re\n",
"from nltk.corpus import stopwords\n",
......@@ -401,7 +683,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -421,7 +703,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
......@@ -430,7 +712,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
......@@ -445,7 +727,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
......@@ -454,7 +736,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"outputs": [
{
......@@ -1463,7 +1745,7 @@
" ...]"
]
},
"execution_count": 17,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1474,7 +1756,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
......@@ -1487,7 +1769,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"metadata": {},
"outputs": [
{
......@@ -1504,14 +1786,14 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Computing transition probabilities: 100%|█████████████████████████████████████████| 1508/1508 [00:05<00:00, 292.23it/s]\n"
"Computing transition probabilities: 100%|██████████| 1508/1508 [00:08<00:00, 183.21it/s]\n"
]
},
{
......@@ -1531,14 +1813,14 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
......@@ -1546,14 +1828,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
"caffein\n",
"retro\n",
"transpar\n",
"assassin\n",
"junior\n",
"retro\n",
"rubber\n",
"pace\n",
"assassin\n",
"emoji\n"
"digit\n",
"transpar\n",
"caffein\n",
"collect\n",
"creed\n"
]
}
],
......@@ -1565,14 +1848,14 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
......@@ -1581,14 +1864,13 @@
"output_type": "stream",
"text": [
"tshirt\n",
"proplanet\n",
"signatur\n",
"alci\n",
"stardust\n",
"homm\n",
"price\n",
"bonati\n",
"greenturn\n",
"nautica\n",
"fritzberg\n"
"tantra\n",
"naresh\n",
"statement\n"
]
}
],
......@@ -1600,14 +1882,14 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
......@@ -1616,14 +1898,14 @@
"output_type": "stream",
"text": [
"price\n",
"dennison\n",
"oxolloxo\n",
"southbay\n",
"ivoc\n",
"guess\n",
"excalibur\n",
"rigo\n",
"smokestack\n",
"zeal\n"
"dazzio\n",
"oxolloxo\n",
"dilling\n",
"rasm\n",
"dennison\n"
]
}
],
......@@ -1636,7 +1918,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"metadata": {},
"outputs": [
{
......@@ -1644,13 +1926,14 @@
"output_type": "stream",
"text": [
"bottomwear\n",
"alvaro\n",
"loungewear\n",
"enfield\n",
"apparel\n",
"headwear\n",
"peter\n",
"nightwear\n",
"pacif\n"
"royal\n",
"mumf\n",
"alvaro\n",
"underwear\n",
"robe\n"
]
}
],
......@@ -1664,14 +1947,14 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py:877: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py:877: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
" vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)\n"
]
},
......@@ -1679,7 +1962,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"men\n"
"black\n"
]
}
],
......@@ -1689,25 +1972,25 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('rubber', 1.6524951457977295),\n",
" ('pro', 1.6506301164627075),\n",
" ('cncpt', 1.6447763442993164),\n",
" ('nsw', 1.6380010843276978),\n",
" ('brt', 1.5377882719039917),\n",
" ('biofus', 1.5276793241500854),\n",
" ('text', 1.4686168432235718),\n",
" ('shimmer', 1.4551149606704712),\n",
" ('gsw', 1.4540029764175415),\n",
" ('floyd', 1.447547435760498)]"
"[('pro', 1.9804141521453857),\n",
" ('nsw', 1.8536335229873657),\n",
" ('brt', 1.6897633075714111),\n",
" ('thoroughbr', 1.641608715057373),\n",
" ('fcb', 1.6144654750823975),\n",
" ('scoop', 1.5819679498672485),\n",
" ('cncpt', 1.5131385326385498),\n",
" ('spanish', 1.4589388370513916),\n",
" ('caffein', 1.4484915733337402),\n",
" ('stad', 1.440345287322998)]"
]
},
"execution_count": 26,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1718,16 +2001,16 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
"0.99999994"
]
},
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1738,16 +2021,16 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.38277367"
"0.33650184"
]
},
"execution_count": 28,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1758,7 +2041,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 30,
"metadata": {},
"outputs": [
{
......@@ -1767,7 +2050,7 @@
"'white'"
]
},
"execution_count": 29,
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1775,6 +2058,372 @@
"source": [
"model.wv.doesnt_match([\"brown\", \"white\", \"black\"])"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.32085222"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.similarity(w1=\"black\", w2=\"white\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.27853268"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.similarity(w1=\"white\", w2=\"brown\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"bottomwear\n",
"loungewear\n",
"enfield\n",
"apparel\n",
"royal\n",
"mumf\n",
"alvaro\n",
"underwear\n",
"robe\n"
]
}
],
"source": [
"for node, _ in model.most_similar('best'):\n",
" if len(node) > 3:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8185"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"descriptions = data1[\"DESCRIPTION\"].unique().tolist()\n",
"len(descriptions)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"# shuffle customer ID's\n",
"random.shuffle(descriptions)\n",
"\n",
"# extract 90% of customer ID's\n",
"descriptions_train = [descriptions[i] for i in range(round(0.9*len(descriptions)))]\n",
"\n",
"# split data into train and validation set\n",
"train_df = data1[data1['DESCRIPTION'].isin(descriptions_train)]\n",
"validation_df = data1[~data1['DESCRIPTION'].isin(descriptions_train)]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 7366/7366 [00:16<00:00, 449.58it/s]\n"
]
}
],
"source": [
"from tqdm import tqdm\n",
"\n",
"# list to capture purchase history of the customers\n",
"products_train = []\n",
"\n",
"# populate the list with the product codes\n",
"for i in tqdm(descriptions_train):\n",
" temp = train_df[train_df[\"DESCRIPTION\"] == i][\"CATEGORY\"].tolist()\n",
" products_train.append(temp)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 819/819 [00:00<00:00, 893.95it/s]\n"
]
}
],
"source": [
"products_val = []\n",
"\n",
"# populate the list with the product codes\n",
"for i in tqdm(validation_df['DESCRIPTION'].unique()):\n",
" temp = validation_df[validation_df[\"DESCRIPTION\"] == i][\"CATEGORY\"].tolist()\n",
" products_val.append(temp)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" after removing the cwd from sys.path.\n"
]
}
],
"source": [
"products = data1[[\"CATEGORY\", \"NAME\"]]\n",
"\n",
"# remove duplicates\n",
"products.drop_duplicates(inplace=True, subset='CATEGORY', keep=\"last\")\n",
"\n",
"# create product-ID and product-description dictionary\n",
"products_dict = products.groupby('CATEGORY')['NAME'].apply(list).to_dict()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Navy Blue Checked Casual Jacket']"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# test the dictionary\n",
"products_dict['Men Jackets Coats']"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Wills Lifestyle Men Grey Regular Fit Woollen Solid Formal Trousers']"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"products_dict['Men Formal Trousers']"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"def similar_products(v, n = 6):\n",
" \n",
" # extract most similar products for the input vector\n",
" ms = model.similar_by_vector(v, topn= n+1)[1:]\n",
" \n",
" # extract name and similarity score of the similar products\n",
" new_ms = []\n",
" for j in ms:\n",
" pair = (products_dict[j[0]][0], j[1])\n",
" new_ms.append(pair)\n",
" \n",
" return new_ms"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: Call to deprecated `similar_by_vector` (Method will be removed in 4.0.0, use self.wv.similar_by_vector() instead).\n",
" after removing the cwd from sys.path.\n"
]
},
{
"ename": "KeyError",
"evalue": "'smag'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-44-b12c7e3e20d0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msimilar_products\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'fort'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m<ipython-input-42-19f474f97eb7>\u001b[0m in \u001b[0;36msimilar_products\u001b[1;34m(v, n)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mnew_ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mms\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mpair\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mproducts_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mnew_ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpair\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: 'smag'"
]
}
],
"source": [
"similar_products(model['fort'])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"def aggregate_vectors(products):\n",
" product_vec = []\n",
" for i in products:\n",
" try:\n",
" product_vec.append(model[i])\n",
" except KeyError:\n",
" continue\n",
" \n",
" return np.mean(product_vec, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(products_val[0])"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:5: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
" \"\"\"\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3118: RuntimeWarning: Mean of empty slice.\n",
" out=out, **kwargs)\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\numpy\\core\\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars\n",
" ret = ret.dtype.type(ret / rcount)\n",
"C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: Call to deprecated `similar_by_vector` (Method will be removed in 4.0.0, use self.wv.similar_by_vector() instead).\n",
" after removing the cwd from sys.path.\n"
]
},
{
"ename": "TypeError",
"evalue": "cannot unpack non-iterable numpy.float64 object",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-48-0643cd97cf20>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msimilar_products\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maggregate_vectors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mproducts_val\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m<ipython-input-42-19f474f97eb7>\u001b[0m in \u001b[0;36msimilar_products\u001b[1;34m(v, n)\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# extract most similar products for the input vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msimilar_by_vector\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[1;31m# extract name and similarity score of the similar products\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\utils.py\u001b[0m in \u001b[0;36mnew_func1\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 1445\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1446\u001b[0m )\n\u001b[1;32m-> 1447\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1448\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1449\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mnew_func1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\base_any2vec.py\u001b[0m in \u001b[0;36msimilar_by_vector\u001b[1;34m(self, vector, topn, restrict_vocab)\u001b[0m\n\u001b[0;32m 1432\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1433\u001b[0m \"\"\"\n\u001b[1;32m-> 1434\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msimilar_by_vector\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvector\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1435\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1436\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mdeprecated\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Method will be removed in 4.0.0, use self.wv.doesnt_match() instead\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py\u001b[0m in \u001b[0;36msimilar_by_vector\u001b[1;34m(self, vector, topn, restrict_vocab)\u001b[0m\n\u001b[0;32m 620\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 621\u001b[0m \"\"\"\n\u001b[1;32m--> 622\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmost_similar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpositive\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvector\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtopn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 623\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 624\u001b[0m @deprecated(\n",
"\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py\u001b[0m in \u001b[0;36mmost_similar\u001b[1;34m(self, positive, negative, topn, restrict_vocab, indexer)\u001b[0m\n\u001b[0;32m 547\u001b[0m \u001b[1;31m# compute the weighted average of all words\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 548\u001b[0m \u001b[0mall_words\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmean\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m \u001b[1;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mpositive\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mnegative\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 550\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mndarray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 551\u001b[0m \u001b[0mmean\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweight\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mword\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: cannot unpack non-iterable numpy.float64 object"
]
}
],
"source": [
"similar_products(aggregate_vectors(products_val[0]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment