Delete node2vecTA.ipynb

7b7192f9 · Boas Demeson Pangaribuan · b7411b5c · b7411b5c
Commit 7b7192f9 authored Apr 09, 2020 by Boas Demeson Pangaribuan
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 2450 deletions

node2vecTA.ipynb Algoritma Node2vec/node2vecTA.ipynb +0 -2450

No files found.
--- a/Algoritma Node2vec/node2vecTA.ipynb
+++ b/Algoritma Node2vec/node2vecTA.ipynb
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "\n",
-    "data = pd.read_csv(\"C:/Users/Agusti Frananda/Documents/PROYEK/myntra-mens-product-dataset/men-products.csv\", delimiter=',', index_col=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "Int64Index: 61456 entries, 1 to 61503\n",
-      "Data columns (total 10 columns):\n",
-      "NAME                   61455 non-null object\n",
-      "CATEGORY               61456 non-null object\n",
-      "DESCRIPTION & COLOR    61456 non-null object\n",
-      "FABRIC                 56623 non-null object\n",
-      "IMAGE                  61456 non-null object\n",
-      "SIZE                   57618 non-null object\n",
-      "PRICE                  61456 non-null object\n",
-      "PRODUCT ID             61456 non-null int64\n",
-      "WEBSITE                61456 non-null object\n",
-      "PRODUCT URL            61456 non-null object\n",
-      "dtypes: int64(1), object(9)\n",
-      "memory usage: 5.2+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "data.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>total_missing</th>\n",
-       "      <th>percent_missing</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>NAME</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0.005</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>CATEGORY</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>DESCRIPTION &amp; COLOR</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>FABRIC</td>\n",
-       "      <td>4833</td>\n",
-       "      <td>24.165</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>IMAGE</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>SIZE</td>\n",
-       "      <td>3838</td>\n",
-       "      <td>19.190</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>PRICE</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>PRODUCT ID</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>WEBSITE</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>PRODUCT URL</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                     total_missing  percent_missing\n",
-       "NAME                             1            0.005\n",
-       "CATEGORY                         0            0.000\n",
-       "DESCRIPTION & COLOR              0            0.000\n",
-       "FABRIC                        4833           24.165\n",
-       "IMAGE                            0            0.000\n",
-       "SIZE                          3838           19.190\n",
-       "PRICE                            0            0.000\n",
-       "PRODUCT ID                       0            0.000\n",
-       "WEBSITE                          0            0.000\n",
-       "PRODUCT URL                      0            0.000"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "missing_data = pd.DataFrame({'total_missing': data.isnull().sum(), 'percent_missing': (data.isnull().sum()/20000)*100})\n",
-    "missing_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>CATEGORY</th>\n",
-       "      <th>DESCRIPTION &amp; COLOR</th>\n",
-       "      <th>FABRIC</th>\n",
-       "      <th>IMAGE</th>\n",
-       "      <th>SIZE</th>\n",
-       "      <th>PRICE</th>\n",
-       "      <th>PRODUCT ID</th>\n",
-       "      <th>WEBSITE</th>\n",
-       "      <th>PRODUCT URL</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>SERIAL NO</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>1</td>\n",
-       "      <td>U.S. Polo Assn. Men Brown Genuine Leather Two ...</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>U.S. Polo Assn. Men Brown Genuine Leather Two ...</td>\n",
-       "      <td>Genuine leather</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Height: 11.5 cm</td>\n",
-       "      <td>809</td>\n",
-       "      <td>1943420</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/wallets/us-polo-assn/us...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>2</td>\n",
-       "      <td>Baggit Men Black Solid Two Fold Wallet</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Baggit Men Black Solid Two Fold Wallet,  Baggi...</td>\n",
-       "      <td>PU</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Height:</td>\n",
-       "      <td>720</td>\n",
-       "      <td>4608404</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/wallets/baggit/baggit-m...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>3</td>\n",
-       "      <td>HRX by Hrithik Roshan Men Grey Solid Baseball Cap</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>HRX By Hrithik Roshan Men Grey Solid Baseball ...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>279</td>\n",
-       "      <td>2178513</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/caps/hrx-by-hrithik-ros...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>4</td>\n",
-       "      <td>Puma Unisex Grey Style Military Solid Baseball...</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Puma Unisex Grey Style Military Solid Baseball...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>499</td>\n",
-       "      <td>6699035</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/caps/puma/puma-unisex-g...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>5</td>\n",
-       "      <td>FabSeasons Beige Solid Scarf</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>FabSeasons Beige Solid Scarf, FabSeasons, Scar...</td>\n",
-       "      <td>Acrylic</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Length:0.9 m</td>\n",
-       "      <td>449</td>\n",
-       "      <td>2439658</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/scarves/fabseasons/fabs...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>6</td>\n",
-       "      <td>Ed Hardy Men Black Embellished Belt</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Ed Hardy Men Black Embellished Belt,  Ed Hardy...</td>\n",
-       "      <td>Leather</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Width: 3.7 cm</td>\n",
-       "      <td>1199</td>\n",
-       "      <td>2238752</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/belts/ed-hardy/ed-hardy...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>7</td>\n",
-       "      <td>Roadster Men Tan Brown Leather Belt</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Roadster Men Tan Brown Leather Belt, Roadster,...</td>\n",
-       "      <td>Leather</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Width: 4 cm</td>\n",
-       "      <td>419</td>\n",
-       "      <td>2975974</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/belts/roadster/roadster...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>8</td>\n",
-       "      <td>Peora Silver-Toned Rhodium-Plated Stone-Studde...</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Peora Silver Toned Rhodium Plated Stone Studde...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>551</td>\n",
-       "      <td>3006095</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/ring/peora/peora-silver...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>9</td>\n",
-       "      <td>Royal Enfield Unisex White Urban Trooper Helme...</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>Royal Enfield Unisex White Urban Trooper Helme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>3500</td>\n",
-       "      <td>2242802</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/helmets/royal-enfield/r...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>10</td>\n",
-       "      <td>BuckleUp Men Black Leather Belt</td>\n",
-       "      <td>accessories</td>\n",
-       "      <td>BuckleUp Men Black Leather Belt, BuckleUp, Bel...</td>\n",
-       "      <td>Leather</td>\n",
-       "      <td>https://assets.myntassets.com/h_1440,q_100,w_1...</td>\n",
-       "      <td>Width: 3.5 cm</td>\n",
-       "      <td>517</td>\n",
-       "      <td>1734718</td>\n",
-       "      <td>Myntra</td>\n",
-       "      <td>https://www.myntra.com/belts/buckleup/buckleup...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                        NAME     CATEGORY  \\\n",
-       "SERIAL NO                                                                   \n",
-       "1          U.S. Polo Assn. Men Brown Genuine Leather Two ...  accessories   \n",
-       "2                     Baggit Men Black Solid Two Fold Wallet  accessories   \n",
-       "3          HRX by Hrithik Roshan Men Grey Solid Baseball Cap  accessories   \n",
-       "4          Puma Unisex Grey Style Military Solid Baseball...  accessories   \n",
-       "5                               FabSeasons Beige Solid Scarf  accessories   \n",
-       "6                        Ed Hardy Men Black Embellished Belt  accessories   \n",
-       "7                        Roadster Men Tan Brown Leather Belt  accessories   \n",
-       "8          Peora Silver-Toned Rhodium-Plated Stone-Studde...  accessories   \n",
-       "9          Royal Enfield Unisex White Urban Trooper Helme...  accessories   \n",
-       "10                           BuckleUp Men Black Leather Belt  accessories   \n",
-       "\n",
-       "                                         DESCRIPTION & COLOR  \\\n",
-       "SERIAL NO                                                      \n",
-       "1          U.S. Polo Assn. Men Brown Genuine Leather Two ...   \n",
-       "2          Baggit Men Black Solid Two Fold Wallet,  Baggi...   \n",
-       "3          HRX By Hrithik Roshan Men Grey Solid Baseball ...   \n",
-       "4          Puma Unisex Grey Style Military Solid Baseball...   \n",
-       "5          FabSeasons Beige Solid Scarf, FabSeasons, Scar...   \n",
-       "6          Ed Hardy Men Black Embellished Belt,  Ed Hardy...   \n",
-       "7          Roadster Men Tan Brown Leather Belt, Roadster,...   \n",
-       "8          Peora Silver Toned Rhodium Plated Stone Studde...   \n",
-       "9          Royal Enfield Unisex White Urban Trooper Helme...   \n",
-       "10         BuckleUp Men Black Leather Belt, BuckleUp, Bel...   \n",
-       "\n",
-       "                     FABRIC  \\\n",
-       "SERIAL NO                     \n",
-       "1          Genuine leather    \n",
-       "2                       PU    \n",
-       "3                       NaN   \n",
-       "4                       NaN   \n",
-       "5                  Acrylic    \n",
-       "6                  Leather    \n",
-       "7                   Leather   \n",
-       "8                       NaN   \n",
-       "9                       NaN   \n",
-       "10                  Leather   \n",
-       "\n",
-       "                                                       IMAGE             SIZE  \\\n",
-       "SERIAL NO                                                                       \n",
-       "1          https://assets.myntassets.com/h_1440,q_100,w_1...  Height: 11.5 cm   \n",
-       "2          https://assets.myntassets.com/h_1440,q_100,w_1...          Height:   \n",
-       "3          https://assets.myntassets.com/h_1440,q_100,w_1...              NaN   \n",
-       "4          https://assets.myntassets.com/h_1440,q_100,w_1...              NaN   \n",
-       "5          https://assets.myntassets.com/h_1440,q_100,w_1...     Length:0.9 m   \n",
-       "6          https://assets.myntassets.com/h_1440,q_100,w_1...    Width: 3.7 cm   \n",
-       "7          https://assets.myntassets.com/h_1440,q_100,w_1...      Width: 4 cm   \n",
-       "8          https://assets.myntassets.com/h_1440,q_100,w_1...              NaN   \n",
-       "9          https://assets.myntassets.com/h_1440,q_100,w_1...              NaN   \n",
-       "10         https://assets.myntassets.com/h_1440,q_100,w_1...    Width: 3.5 cm   \n",
-       "\n",
-       "          PRICE  PRODUCT ID WEBSITE  \\\n",
-       "SERIAL NO                             \n",
-       "1           809     1943420  Myntra   \n",
-       "2           720     4608404  Myntra   \n",
-       "3           279     2178513  Myntra   \n",
-       "4           499     6699035  Myntra   \n",
-       "5           449     2439658  Myntra   \n",
-       "6          1199     2238752  Myntra   \n",
-       "7           419     2975974  Myntra   \n",
-       "8           551     3006095  Myntra   \n",
-       "9          3500     2242802  Myntra   \n",
-       "10          517     1734718  Myntra   \n",
-       "\n",
-       "                                                 PRODUCT URL  \n",
-       "SERIAL NO                                                     \n",
-       "1          https://www.myntra.com/wallets/us-polo-assn/us...  \n",
-       "2          https://www.myntra.com/wallets/baggit/baggit-m...  \n",
-       "3          https://www.myntra.com/caps/hrx-by-hrithik-ros...  \n",
-       "4          https://www.myntra.com/caps/puma/puma-unisex-g...  \n",
-       "5          https://www.myntra.com/scarves/fabseasons/fabs...  \n",
-       "6          https://www.myntra.com/belts/ed-hardy/ed-hardy...  \n",
-       "7          https://www.myntra.com/belts/roadster/roadster...  \n",
-       "8          https://www.myntra.com/ring/peora/peora-silver...  \n",
-       "9          https://www.myntra.com/helmets/royal-enfield/r...  \n",
-       "10         https://www.myntra.com/belts/buckleup/buckleup...  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data.head(10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data.duplicated().sum()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "del data['FABRIC']\n",
-    "del data['IMAGE']\n",
-    "del data['SIZE']\n",
-    "del data['WEBSITE']\n",
-    "del data['PRODUCT URL']\n",
-    "del data['PRICE']\n",
-    "del data['PRODUCT ID']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.rename(columns = {'DESCRIPTION & COLOR':'DESCRIPTION'}, inplace = True) "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data1 = data.sample(10000, random_state=1).copy()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Rename kategori produk\n",
-    "data1.replace({'CATEGORY': \n",
-    "             {'accessories': 'Accesories', \n",
-    "              'casual-shirts': 'Casual Shirts',\n",
-    "              'Men-Casual-Trousers': 'Men Casual Trousers',\n",
-    "              'formal-shirts': 'Formal Shirts',\n",
-    "              'Men-Formal-Trousers': 'Men Formal Trousers',\n",
-    "              'men-jackets-coats': 'Men Jackets Coats',\n",
-    "              'men-swimwear': 'Men Swimwear',\n",
-    "              'men-suits': 'Men Suits'}}, \n",
-    "              inplace= True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>NAME</th>\n",
-       "      <th>CATEGORY</th>\n",
-       "      <th>DESCRIPTION</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>SERIAL NO</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td>25553</td>\n",
-       "      <td>Fort Collins Men Red Solid Padded Jacket</td>\n",
-       "      <td>Men Jackets Coats</td>\n",
-       "      <td>Fort Collins Men Red Solid Padded Jacket,  For...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>18640</td>\n",
-       "      <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
-       "      <td>Men Formal Trousers</td>\n",
-       "      <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>18543</td>\n",
-       "      <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
-       "      <td>Men Formal Trousers</td>\n",
-       "      <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>21475</td>\n",
-       "      <td>Hanes Charcoal Grey Thermal T-Shirt</td>\n",
-       "      <td>Innerwear &amp; Sleapwear</td>\n",
-       "      <td>Hanes Charcoal Grey Thermal T Shirt,  Hanes, T...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td>14859</td>\n",
-       "      <td>Hancock Men Blue Regular Fit Striped Formal Shirt</td>\n",
-       "      <td>Formal Shirts</td>\n",
-       "      <td>Hancock Men Blue Regular Fit Striped Formal Sh...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                        NAME  \\\n",
-       "SERIAL NO                                                      \n",
-       "25553               Fort Collins Men Red Solid Padded Jacket   \n",
-       "18640      MANGO MAN Men Navy Blue Tailored Slim Fit Soli...   \n",
-       "18543      Arrow Men Navy Blue Tapered Fit Checked Formal...   \n",
-       "21475                    Hanes Charcoal Grey Thermal T-Shirt   \n",
-       "14859      Hancock Men Blue Regular Fit Striped Formal Shirt   \n",
-       "\n",
-       "                        CATEGORY  \\\n",
-       "SERIAL NO                          \n",
-       "25553          Men Jackets Coats   \n",
-       "18640        Men Formal Trousers   \n",
-       "18543        Men Formal Trousers   \n",
-       "21475      Innerwear & Sleapwear   \n",
-       "14859              Formal Shirts   \n",
-       "\n",
-       "                                                 DESCRIPTION  \n",
-       "SERIAL NO                                                     \n",
-       "25553      Fort Collins Men Red Solid Padded Jacket,  For...  \n",
-       "18640      MANGO MAN Men Navy Blue Tailored Slim Fit Soli...  \n",
-       "18543      Arrow Men Navy Blue Tapered Fit Checked Formal...  \n",
-       "21475      Hanes Charcoal Grey Thermal T Shirt,  Hanes, T...  \n",
-       "14859      Hancock Men Blue Regular Fit Striped Formal Sh...  "
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data1.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text_total = data1.DESCRIPTION \n",
-    "text_total = text_total.reset_index(drop=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0    Fort Collins Men Red Solid Padded Jacket,  For...\n",
-      "1    MANGO MAN Men Navy Blue Tailored Slim Fit Soli...\n",
-      "2    Arrow Men Navy Blue Tapered Fit Checked Formal...\n",
-      "3    Hanes Charcoal Grey Thermal T Shirt,  Hanes, T...\n",
-      "4    Hancock Men Blue Regular Fit Striped Formal Sh...\n",
-      "Name: DESCRIPTION, dtype: object\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(text_total.head())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:17: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
-      "  from collections import Mapping, defaultdict\n"
-     ]
-    }
-   ],
-   "source": [
-    "import re\n",
-    "from nltk.corpus import stopwords\n",
-    "import pandas as pd\n",
-    "from nltk.stem import PorterStemmer\n",
-    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
-    "\n",
-    "def  review_to_words(raw_text):\n",
-    "\n",
-    "    # keep only words\n",
-    "    letters_only_text = re.sub(\"[^a-zA-Z]\", \" \", raw_text)\n",
-    "\n",
-    "    # convert to lower case and split \n",
-    "    words = letters_only_text.lower().split()\n",
-    "\n",
-    "    # remove stopwords\n",
-    "    stopword_set = set(stopwords.words(\"english\"))\n",
-    "    meaningful_words = [w for w in words if w not in stopword_set]\n",
-    "    \n",
-    "    #stemmed words\n",
-    "    ps = PorterStemmer()\n",
-    "    stemmed_words = [ps.stem(word) for word in meaningful_words]\n",
-    "    \n",
-    "    #join the cleaned words in a list\n",
-    "    cleaned_word_list = \" \".join(stemmed_words)\n",
-    "    \n",
-    "    return cleaned_word_list"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['fort collin men red solid pad jacket fort collin jacket topwear apparel apparel men buy fort collin men red solid pad jacket onlin india buy jacket best price', 'mango man men navi blue tailor slim fit solid formal trouser mango man trouser bottomwear apparel apparel men buy mango man men navi blue tailor slim fit solid formal trouser onlin india buy trouser best price', 'arrow men navi blue taper fit check formal trouser arrow trouser bottomwear apparel apparel men buy arrow men navi blue taper fit check formal trouser onlin india buy trouser best price', 'hane charcoal grey thermal shirt hane thermal top innerwear apparel apparel men buy hane charcoal grey thermal shirt onlin india buy thermal top best price', 'hancock men blue regular fit stripe formal shirt hancock shirt topwear apparel apparel men buy hancock men blue regular fit stripe formal shirt onlin india buy shirt best price']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# apply it to our text data \n",
-    "# dataset is named wine_data and the text are in the column \"wmn\"\n",
-    "processed_wmn = [review_to_words(str(text)) for text in text_total]\n",
-    "print(processed_wmn[:5])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "processed_words = \" \".join(processed_wmn).split()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def transform_sentences_to_pair_of_words(sentences):\n",
-    "    list_of_pair_of_words = []\n",
-    "    for i in range(len(sentences)):\n",
-    "        buffer_pair_of_words = (sentences[i-1],sentences[i])\n",
-    "        list_of_pair_of_words.append(buffer_pair_of_words)\n",
-    "    del list_of_pair_of_words[0]\n",
-    "    return list_of_pair_of_words"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sentences_to_pair = transform_sentences_to_pair_of_words(processed_words)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'red'),\n",
-       " ('red', 'solid'),\n",
-       " ('solid', 'pad'),\n",
-       " ('pad', 'jacket'),\n",
-       " ('jacket', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'jacket'),\n",
-       " ('jacket', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'red'),\n",
-       " ('red', 'solid'),\n",
-       " ('solid', 'pad'),\n",
-       " ('pad', 'jacket'),\n",
-       " ('jacket', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jacket'),\n",
-       " ('jacket', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'mango'),\n",
-       " ('mango', 'man'),\n",
-       " ('man', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'tailor'),\n",
-       " ('tailor', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'mango'),\n",
-       " ('mango', 'man'),\n",
-       " ('man', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'mango'),\n",
-       " ('mango', 'man'),\n",
-       " ('man', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'tailor'),\n",
-       " ('tailor', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'arrow'),\n",
-       " ('arrow', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'taper'),\n",
-       " ('taper', 'fit'),\n",
-       " ('fit', 'check'),\n",
-       " ('check', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'arrow'),\n",
-       " ('arrow', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'arrow'),\n",
-       " ('arrow', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'taper'),\n",
-       " ('taper', 'fit'),\n",
-       " ('fit', 'check'),\n",
-       " ('check', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'hane'),\n",
-       " ('hane', 'charcoal'),\n",
-       " ('charcoal', 'grey'),\n",
-       " ('grey', 'thermal'),\n",
-       " ('thermal', 'shirt'),\n",
-       " ('shirt', 'hane'),\n",
-       " ('hane', 'thermal'),\n",
-       " ('thermal', 'top'),\n",
-       " ('top', 'innerwear'),\n",
-       " ('innerwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'hane'),\n",
-       " ('hane', 'charcoal'),\n",
-       " ('charcoal', 'grey'),\n",
-       " ('grey', 'thermal'),\n",
-       " ('thermal', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'thermal'),\n",
-       " ('thermal', 'top'),\n",
-       " ('top', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'hancock'),\n",
-       " ('hancock', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'stripe'),\n",
-       " ('stripe', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'hancock'),\n",
-       " ('hancock', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'hancock'),\n",
-       " ('hancock', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'stripe'),\n",
-       " ('stripe', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'tantra'),\n",
-       " ('tantra', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'tantra'),\n",
-       " ('tantra', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'tantra'),\n",
-       " ('tantra', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'aeropostal'),\n",
-       " ('aeropostal', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'mildli'),\n",
-       " ('mildli', 'distress'),\n",
-       " ('distress', 'jean'),\n",
-       " ('jean', 'aeropostal'),\n",
-       " ('aeropostal', 'jean'),\n",
-       " ('jean', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'aeropostal'),\n",
-       " ('aeropostal', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'mildli'),\n",
-       " ('mildli', 'distress'),\n",
-       " ('distress', 'jean'),\n",
-       " ('jean', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jean'),\n",
-       " ('jean', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'ether'),\n",
-       " ('ether', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'anti'),\n",
-       " ('anti', 'microbi'),\n",
-       " ('microbi', 'cotton'),\n",
-       " ('cotton', 'linen'),\n",
-       " ('linen', 'shirt'),\n",
-       " ('shirt', 'ether'),\n",
-       " ('ether', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'ether'),\n",
-       " ('ether', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'anti'),\n",
-       " ('anti', 'microbi'),\n",
-       " ('microbi', 'cotton'),\n",
-       " ('cotton', 'linen'),\n",
-       " ('linen', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'roadster'),\n",
-       " ('roadster', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'jean'),\n",
-       " ('jean', 'roadster'),\n",
-       " ('roadster', 'jean'),\n",
-       " ('jean', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'roadster'),\n",
-       " ('roadster', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'jean'),\n",
-       " ('jean', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jean'),\n",
-       " ('jean', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'dollar'),\n",
-       " ('dollar', 'bigboss'),\n",
-       " ('bigboss', 'pack'),\n",
-       " ('pack', 'trunk'),\n",
-       " ('trunk', 'mdtr'),\n",
-       " ('mdtr', 'po'),\n",
-       " ('po', 'dollar'),\n",
-       " ('dollar', 'bigboss'),\n",
-       " ('bigboss', 'trunk'),\n",
-       " ('trunk', 'innerwear'),\n",
-       " ('innerwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'dollar'),\n",
-       " ('dollar', 'bigboss'),\n",
-       " ('bigboss', 'pack'),\n",
-       " ('pack', 'trunk'),\n",
-       " ('trunk', 'mdtr'),\n",
-       " ('mdtr', 'po'),\n",
-       " ('po', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trunk'),\n",
-       " ('trunk', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'print'),\n",
-       " ('print', 'polo'),\n",
-       " ('polo', 'collar'),\n",
-       " ('collar', 'shirt'),\n",
-       " ('shirt', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'print'),\n",
-       " ('print', 'polo'),\n",
-       " ('polo', 'collar'),\n",
-       " ('collar', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'self'),\n",
-       " ('self', 'design'),\n",
-       " ('design', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'self'),\n",
-       " ('self', 'design'),\n",
-       " ('design', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'light'),\n",
-       " ('light', 'blue'),\n",
-       " ('blue', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'skinni'),\n",
-       " ('skinni', 'fit'),\n",
-       " ('fit', 'jean'),\n",
-       " ('jean', 'calvin'),\n",
-       " ('calvin', 'klein'),\n",
-       " ('klein', 'jean'),\n",
-       " ('jean', 'jean'),\n",
-       " ('jean', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'light'),\n",
-       " ('light', 'blue'),\n",
-       " ('blue', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'skinni'),\n",
-       " ('skinni', 'fit'),\n",
-       " ('fit', 'jean'),\n",
-       " ('jean', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jean'),\n",
-       " ('jean', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'highland'),\n",
-       " ('highland', 'men'),\n",
-       " ('men', 'oliv'),\n",
-       " ('oliv', 'green'),\n",
-       " ('green', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'camouflag'),\n",
-       " ('camouflag', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'highland'),\n",
-       " ('highland', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'highland'),\n",
-       " ('highland', 'men'),\n",
-       " ('men', 'oliv'),\n",
-       " ('oliv', 'green'),\n",
-       " ('green', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'camouflag'),\n",
-       " ('camouflag', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'u'),\n",
-       " ('u', 'polo'),\n",
-       " ('polo', 'assn'),\n",
-       " ('assn', 'denim'),\n",
-       " ('denim', 'co'),\n",
-       " ('co', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'u'),\n",
-       " ('u', 'polo'),\n",
-       " ('polo', 'assn'),\n",
-       " ('assn', 'denim'),\n",
-       " ('denim', 'co'),\n",
-       " ('co', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'u'),\n",
-       " ('u', 'polo'),\n",
-       " ('polo', 'assn'),\n",
-       " ('assn', 'denim'),\n",
-       " ('denim', 'co'),\n",
-       " ('co', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'levi'),\n",
-       " ('levi', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'levi'),\n",
-       " ('levi', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'levi'),\n",
-       " ('levi', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'casual'),\n",
-       " ('casual', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'sport'),\n",
-       " ('sport', 'men'),\n",
-       " ('men', 'charcoal'),\n",
-       " ('charcoal', 'grey'),\n",
-       " ('grey', 'solid'),\n",
-       " ('solid', 'tailor'),\n",
-       " ('tailor', 'jacket'),\n",
-       " ('jacket', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'sport'),\n",
-       " ('sport', 'jacket'),\n",
-       " ('jacket', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'loui'),\n",
-       " ('loui', 'philipp'),\n",
-       " ('philipp', 'sport'),\n",
-       " ('sport', 'men'),\n",
-       " ('men', 'charcoal'),\n",
-       " ('charcoal', 'grey'),\n",
-       " ('grey', 'solid'),\n",
-       " ('solid', 'tailor'),\n",
-       " ('tailor', 'jacket'),\n",
-       " ('jacket', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jacket'),\n",
-       " ('jacket', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'killer'),\n",
-       " ('killer', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'jean'),\n",
-       " ('jean', 'killer'),\n",
-       " ('killer', 'jean'),\n",
-       " ('jean', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'killer'),\n",
-       " ('killer', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'jean'),\n",
-       " ('jean', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jean'),\n",
-       " ('jean', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'peter'),\n",
-       " ('peter', 'england'),\n",
-       " ('england', 'casual'),\n",
-       " ('casual', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'regular'),\n",
-       " ('regular', 'trouser'),\n",
-       " ('trouser', 'peter'),\n",
-       " ('peter', 'england'),\n",
-       " ('england', 'casual'),\n",
-       " ('casual', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'peter'),\n",
-       " ('peter', 'england'),\n",
-       " ('england', 'casual'),\n",
-       " ('casual', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'regular'),\n",
-       " ('regular', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'arrow'),\n",
-       " ('arrow', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'taper'),\n",
-       " ('taper', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'arrow'),\n",
-       " ('arrow', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'arrow'),\n",
-       " ('arrow', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'taper'),\n",
-       " ('taper', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'v'),\n",
-       " ('v', 'dot'),\n",
-       " ('dot', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'self'),\n",
-       " ('self', 'design'),\n",
-       " ('design', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'v'),\n",
-       " ('v', 'dot'),\n",
-       " ('dot', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'v'),\n",
-       " ('v', 'dot'),\n",
-       " ('dot', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'self'),\n",
-       " ('self', 'design'),\n",
-       " ('design', 'formal'),\n",
-       " ('formal', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'gespo'),\n",
-       " ('gespo', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'gespo'),\n",
-       " ('gespo', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'gespo'),\n",
-       " ('gespo', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'smag'),\n",
-       " ('smag', 'men'),\n",
-       " ('men', 'mustard'),\n",
-       " ('mustard', 'solid'),\n",
-       " ('solid', 'lightweight'),\n",
-       " ('lightweight', 'tailor'),\n",
-       " ('tailor', 'jacket'),\n",
-       " ('jacket', 'smag'),\n",
-       " ('smag', 'jacket'),\n",
-       " ('jacket', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'smag'),\n",
-       " ('smag', 'men'),\n",
-       " ('men', 'mustard'),\n",
-       " ('mustard', 'solid'),\n",
-       " ('solid', 'lightweight'),\n",
-       " ('lightweight', 'tailor'),\n",
-       " ('tailor', 'jacket'),\n",
-       " ('jacket', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jacket'),\n",
-       " ('jacket', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'jack'),\n",
-       " ('jack', 'jone'),\n",
-       " ('jone', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'regular'),\n",
-       " ('regular', 'trouser'),\n",
-       " ('trouser', 'jack'),\n",
-       " ('jack', 'jone'),\n",
-       " ('jone', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'jack'),\n",
-       " ('jack', 'jone'),\n",
-       " ('jone', 'men'),\n",
-       " ('men', 'black'),\n",
-       " ('black', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'regular'),\n",
-       " ('regular', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'van'),\n",
-       " ('van', 'heusen'),\n",
-       " ('heusen', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'van'),\n",
-       " ('van', 'heusen'),\n",
-       " ('heusen', 'shirt'),\n",
-       " ('shirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'van'),\n",
-       " ('van', 'heusen'),\n",
-       " ('heusen', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'regular'),\n",
-       " ('regular', 'fit'),\n",
-       " ('fit', 'solid'),\n",
-       " ('solid', 'formal'),\n",
-       " ('formal', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'shirt'),\n",
-       " ('shirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'maniac'),\n",
-       " ('maniac', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'solid'),\n",
-       " ('solid', 'v'),\n",
-       " ('v', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'maniac'),\n",
-       " ('maniac', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'maniac'),\n",
-       " ('maniac', 'men'),\n",
-       " ('men', 'grey'),\n",
-       " ('grey', 'solid'),\n",
-       " ('solid', 'v'),\n",
-       " ('v', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'stretchabl'),\n",
-       " ('stretchabl', 'crop'),\n",
-       " ('crop', 'jean'),\n",
-       " ('jean', 'jean'),\n",
-       " ('jean', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'men'),\n",
-       " ('men', 'blue'),\n",
-       " ('blue', 'slim'),\n",
-       " ('slim', 'fit'),\n",
-       " ('fit', 'mid'),\n",
-       " ('mid', 'rise'),\n",
-       " ('rise', 'clean'),\n",
-       " ('clean', 'look'),\n",
-       " ('look', 'stretchabl'),\n",
-       " ('stretchabl', 'crop'),\n",
-       " ('crop', 'jean'),\n",
-       " ('jean', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jean'),\n",
-       " ('jean', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'blackberri'),\n",
-       " ('blackberri', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'trouser'),\n",
-       " ('trouser', 'blackberri'),\n",
-       " ('blackberri', 'trouser'),\n",
-       " ('trouser', 'bottomwear'),\n",
-       " ('bottomwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'blackberri'),\n",
-       " ('blackberri', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'blue'),\n",
-       " ('blue', 'print'),\n",
-       " ('print', 'casual'),\n",
-       " ('casual', 'trouser'),\n",
-       " ('trouser', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'trouser'),\n",
-       " ('trouser', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'longlin'),\n",
-       " ('longlin', 'shirt'),\n",
-       " ('shirt', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'moda'),\n",
-       " ('moda', 'rapido'),\n",
-       " ('rapido', 'men'),\n",
-       " ('men', 'white'),\n",
-       " ('white', 'print'),\n",
-       " ('print', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'longlin'),\n",
-       " ('longlin', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'tan'),\n",
-       " ('tan', 'brown'),\n",
-       " ('brown', 'solid'),\n",
-       " ('solid', 'biker'),\n",
-       " ('biker', 'jacket'),\n",
-       " ('jacket', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'jacket'),\n",
-       " ('jacket', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'tan'),\n",
-       " ('tan', 'brown'),\n",
-       " ('brown', 'solid'),\n",
-       " ('solid', 'biker'),\n",
-       " ('biker', 'jacket'),\n",
-       " ('jacket', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jacket'),\n",
-       " ('jacket', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'rust'),\n",
-       " ('rust', 'brown'),\n",
-       " ('brown', 'solid'),\n",
-       " ('solid', 'biker'),\n",
-       " ('biker', 'jacket'),\n",
-       " ('jacket', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'jacket'),\n",
-       " ('jacket', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'fort'),\n",
-       " ('fort', 'collin'),\n",
-       " ('collin', 'men'),\n",
-       " ('men', 'rust'),\n",
-       " ('rust', 'brown'),\n",
-       " ('brown', 'solid'),\n",
-       " ('solid', 'biker'),\n",
-       " ('biker', 'jacket'),\n",
-       " ('jacket', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'jacket'),\n",
-       " ('jacket', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'esprit'),\n",
-       " ('esprit', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'white'),\n",
-       " ('white', 'stripe'),\n",
-       " ('stripe', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'esprit'),\n",
-       " ('esprit', 'tshirt'),\n",
-       " ('tshirt', 'topwear'),\n",
-       " ('topwear', 'apparel'),\n",
-       " ('apparel', 'apparel'),\n",
-       " ('apparel', 'men'),\n",
-       " ('men', 'buy'),\n",
-       " ('buy', 'esprit'),\n",
-       " ('esprit', 'men'),\n",
-       " ('men', 'navi'),\n",
-       " ('navi', 'white'),\n",
-       " ('white', 'stripe'),\n",
-       " ('stripe', 'round'),\n",
-       " ('round', 'neck'),\n",
-       " ('neck', 'shirt'),\n",
-       " ('shirt', 'onlin'),\n",
-       " ('onlin', 'india'),\n",
-       " ('india', 'buy'),\n",
-       " ('buy', 'tshirt'),\n",
-       " ('tshirt', 'best'),\n",
-       " ('best', 'price'),\n",
-       " ('price', 'u'),\n",
-       " ('u', 'polo'),\n",
-       " ('polo', 'assn'),\n",
-       " ('assn', 'men'),\n",
-       " ('men', 'oliv'),\n",
-       " ('oliv', 'green'),\n",
-       " ('green', 'regular'),\n",
-       " ...]"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "sentences_to_pair"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import networkx as nx\n",
-    "\n",
-    "G = nx.DiGraph()\n",
-    "G.add_edges_from(sentences_to_pair)\n",
-    "G = G.to_undirected()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'networkx.classes.graph.Graph'>\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(type(G))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Computing transition probabilities: 100%|██████████| 1508/1508 [00:08<00:00, 183.21it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model Saved\n"
-     ]
-    }
-   ],
-   "source": [
-    "from node2vec import Node2Vec\n",
-    "node2vec = Node2Vec(G, dimensions=20, walk_length=16, num_walks=100, workers=2) \n",
-    "model = node2vec.fit(window=10, min_count=1) \n",
-    "print('Model Saved')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
-      "  \"\"\"Entry point for launching an IPython kernel.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "assassin\n",
-      "junior\n",
-      "retro\n",
-      "rubber\n",
-      "digit\n",
-      "transpar\n",
-      "caffein\n",
-      "collect\n",
-      "creed\n"
-     ]
-    }
-   ],
-   "source": [
-    "for node, _ in model.most_similar('black'):\n",
-    "    if len(node) > 3:\n",
-    "        print(node)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
-      "  \"\"\"Entry point for launching an IPython kernel.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tshirt\n",
-      "signatur\n",
-      "stardust\n",
-      "homm\n",
-      "price\n",
-      "tantra\n",
-      "naresh\n",
-      "statement\n"
-     ]
-    }
-   ],
-   "source": [
-    "for node, _ in model.most_similar('men'):\n",
-    "    if len(node) > 3:\n",
-    "        print(node)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
-      "  \"\"\"Entry point for launching an IPython kernel.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "price\n",
-      "ivoc\n",
-      "guess\n",
-      "excalibur\n",
-      "dazzio\n",
-      "oxolloxo\n",
-      "dilling\n",
-      "rasm\n",
-      "dennison\n"
-     ]
-    }
-   ],
-   "source": [
-    "for node, _ in model.most_similar('buy'):\n",
-    "    # Show only players\n",
-    "    if len(node) > 3:\n",
-    "        print(node)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "bottomwear\n",
-      "loungewear\n",
-      "enfield\n",
-      "apparel\n",
-      "royal\n",
-      "mumf\n",
-      "alvaro\n",
-      "underwear\n",
-      "robe\n"
-     ]
-    }
-   ],
-   "source": [
-    "w1 = \"best\"\n",
-    "for node, _ in model.wv.most_similar (positive=w1, topn=10):\n",
-    "    # Show only players\n",
-    "    if len(node) > 3:\n",
-    "        print(node)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py:877: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
-      "  vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "black\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(model.wv.doesnt_match(\"men black jeans\".split()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('pro', 1.9804141521453857),\n",
-       " ('nsw', 1.8536335229873657),\n",
-       " ('brt', 1.6897633075714111),\n",
-       " ('thoroughbr', 1.641608715057373),\n",
-       " ('fcb', 1.6144654750823975),\n",
-       " ('scoop', 1.5819679498672485),\n",
-       " ('cncpt', 1.5131385326385498),\n",
-       " ('spanish', 1.4589388370513916),\n",
-       " ('caffein', 1.4484915733337402),\n",
-       " ('stad', 1.440345287322998)]"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.most_similar_cosmul(positive=['woman', 'black'], negative=['man'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.99999994"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.similarity(w1=\"black\", w2=\"black\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.33650184"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.similarity(w1=\"black\", w2=\"brown\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'white'"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.doesnt_match([\"brown\", \"white\", \"black\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.32085222"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.similarity(w1=\"black\", w2=\"white\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.27853268"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.wv.similarity(w1=\"white\", w2=\"brown\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
-      "  \"\"\"Entry point for launching an IPython kernel.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "bottomwear\n",
-      "loungewear\n",
-      "enfield\n",
-      "apparel\n",
-      "royal\n",
-      "mumf\n",
-      "alvaro\n",
-      "underwear\n",
-      "robe\n"
-     ]
-    }
-   ],
-   "source": [
-    "for node, _ in model.most_similar('best'):\n",
-    "    if len(node) > 3:\n",
-    "        print(node)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "8185"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "descriptions = data1[\"DESCRIPTION\"].unique().tolist()\n",
-    "len(descriptions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import random\n",
-    "\n",
-    "# shuffle customer ID's\n",
-    "random.shuffle(descriptions)\n",
-    "\n",
-    "# extract 90% of customer ID's\n",
-    "descriptions_train = [descriptions[i] for i in range(round(0.9*len(descriptions)))]\n",
-    "\n",
-    "# split data into train and validation set\n",
-    "train_df = data1[data1['DESCRIPTION'].isin(descriptions_train)]\n",
-    "validation_df = data1[~data1['DESCRIPTION'].isin(descriptions_train)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 7366/7366 [00:16<00:00, 449.58it/s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "from tqdm import tqdm\n",
-    "\n",
-    "# list to capture purchase history of the customers\n",
-    "products_train = []\n",
-    "\n",
-    "# populate the list with the product codes\n",
-    "for i in tqdm(descriptions_train):\n",
-    "    temp = train_df[train_df[\"DESCRIPTION\"] == i][\"CATEGORY\"].tolist()\n",
-    "    products_train.append(temp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 819/819 [00:00<00:00, 893.95it/s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "products_val = []\n",
-    "\n",
-    "# populate the list with the product codes\n",
-    "for i in tqdm(validation_df['DESCRIPTION'].unique()):\n",
-    "    temp = validation_df[validation_df[\"DESCRIPTION\"] == i][\"CATEGORY\"].tolist()\n",
-    "    products_val.append(temp)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  after removing the cwd from sys.path.\n"
-     ]
-    }
-   ],
-   "source": [
-    "products = data1[[\"CATEGORY\", \"NAME\"]]\n",
-    "\n",
-    "# remove duplicates\n",
-    "products.drop_duplicates(inplace=True, subset='CATEGORY', keep=\"last\")\n",
-    "\n",
-    "# create product-ID and product-description dictionary\n",
-    "products_dict = products.groupby('CATEGORY')['NAME'].apply(list).to_dict()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Navy Blue Checked Casual Jacket']"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# test the dictionary\n",
-    "products_dict['Men Jackets Coats']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Wills Lifestyle Men Grey Regular Fit Woollen Solid Formal Trousers']"
-      ]
-     },
-     "execution_count": 50,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "products_dict['Men Formal Trousers']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def similar_products(v, n = 6):\n",
-    "    \n",
-    "    # extract most similar products for the input vector\n",
-    "    ms = model.similar_by_vector(v, topn= n+1)[1:]\n",
-    "    \n",
-    "    # extract name and similarity score of the similar products\n",
-    "    new_ms = []\n",
-    "    for j in ms:\n",
-    "        pair = (products_dict[j[0]][0], j[1])\n",
-    "        new_ms.append(pair)\n",
-    "        \n",
-    "    return new_ms"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
-      "  \"\"\"Entry point for launching an IPython kernel.\n",
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: Call to deprecated `similar_by_vector` (Method will be removed in 4.0.0, use self.wv.similar_by_vector() instead).\n",
-      "  after removing the cwd from sys.path.\n"
-     ]
-    },
-    {
-     "ename": "KeyError",
-     "evalue": "'smag'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-44-b12c7e3e20d0>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msimilar_products\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'fort'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32m<ipython-input-42-19f474f97eb7>\u001b[0m in \u001b[0;36msimilar_products\u001b[1;34m(v, n)\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[0mnew_ms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mms\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m         \u001b[0mpair\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mproducts_dict\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mj\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     10\u001b[0m         \u001b[0mnew_ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpair\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mKeyError\u001b[0m: 'smag'"
-     ]
-    }
-   ],
-   "source": [
-    "similar_products(model['fort'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def aggregate_vectors(products):\n",
-    "    product_vec = []\n",
-    "    for i in products:\n",
-    "        try:\n",
-    "            product_vec.append(model[i])\n",
-    "        except KeyError:\n",
-    "            continue\n",
-    "        \n",
-    "    return np.mean(product_vec, axis=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(products_val[0])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:5: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).\n",
-      "  \"\"\"\n",
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3118: RuntimeWarning: Mean of empty slice.\n",
-      "  out=out, **kwargs)\n",
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\numpy\\core\\_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars\n",
-      "  ret = ret.dtype.type(ret / rcount)\n",
-      "C:\\Users\\Agusti Frananda\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: Call to deprecated `similar_by_vector` (Method will be removed in 4.0.0, use self.wv.similar_by_vector() instead).\n",
-      "  after removing the cwd from sys.path.\n"
-     ]
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "cannot unpack non-iterable numpy.float64 object",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-48-0643cd97cf20>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0msimilar_products\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maggregate_vectors\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mproducts_val\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32m<ipython-input-42-19f474f97eb7>\u001b[0m in \u001b[0;36msimilar_products\u001b[1;34m(v, n)\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[1;31m# extract most similar products for the input vector\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0mms\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msimilar_by_vector\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m=\u001b[0m \u001b[0mn\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m     \u001b[1;31m# extract name and similarity score of the similar products\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\utils.py\u001b[0m in \u001b[0;36mnew_func1\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m   1445\u001b[0m                     \u001b[0mstacklevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1446\u001b[0m                 )\n\u001b[1;32m-> 1447\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1448\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1449\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mnew_func1\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\base_any2vec.py\u001b[0m in \u001b[0;36msimilar_by_vector\u001b[1;34m(self, vector, topn, restrict_vocab)\u001b[0m\n\u001b[0;32m   1432\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1433\u001b[0m         \"\"\"\n\u001b[1;32m-> 1434\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msimilar_by_vector\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvector\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1435\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1436\u001b[0m     \u001b[1;33m@\u001b[0m\u001b[0mdeprecated\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Method will be removed in 4.0.0, use self.wv.doesnt_match() instead\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py\u001b[0m in \u001b[0;36msimilar_by_vector\u001b[1;34m(self, vector, topn, restrict_vocab)\u001b[0m\n\u001b[0;32m    620\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    621\u001b[0m         \"\"\"\n\u001b[1;32m--> 622\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmost_similar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpositive\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mvector\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtopn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mrestrict_vocab\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    623\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    624\u001b[0m     @deprecated(\n",
-      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py\u001b[0m in \u001b[0;36mmost_similar\u001b[1;34m(self, positive, negative, topn, restrict_vocab, indexer)\u001b[0m\n\u001b[0;32m    547\u001b[0m         \u001b[1;31m# compute the weighted average of all words\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    548\u001b[0m         \u001b[0mall_words\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmean\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 549\u001b[1;33m         \u001b[1;32mfor\u001b[0m \u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mpositive\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mnegative\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    550\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mword\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mndarray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    551\u001b[0m                 \u001b[0mmean\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweight\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mword\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mTypeError\u001b[0m: cannot unpack non-iterable numpy.float64 object"
-     ]
-    }
-   ],
-   "source": [
-    "similar_products(aggregate_vectors(products_val[0]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}