From b14e6a202be9a9cb7f0a0b378f3eaf5c42acb641 Mon Sep 17 00:00:00 2001 From: JISHNU P <cb.en.p2aid19017@cb.students.amrita.edu> Date: Fri, 3 Jul 2020 11:50:10 +0530 Subject: [PATCH] Upload New File --- ...ure_extraction_from_segmented_images.ipynb | 1315 +++++++++++++++++ 1 file changed, 1315 insertions(+) create mode 100644 Jupyter Notebook/_VGG16_feature_extraction_from_segmented_images.ipynb diff --git a/Jupyter Notebook/_VGG16_feature_extraction_from_segmented_images.ipynb b/Jupyter Notebook/_VGG16_feature_extraction_from_segmented_images.ipynb new file mode 100644 index 0000000..19860ac --- /dev/null +++ b/Jupyter Notebook/_VGG16_feature_extraction_from_segmented_images.ipynb @@ -0,0 +1,1315 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt; \n", + " \n", + "# Importing sklearn libraries\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import confusion_matrix, accuracy_score\n", + "import hypopt\n", + "from hypopt import GridSearch\n", + " \n", + "from keras.utils import np_utils\n", + "from keras.models import Sequential\n", + "from keras.applications import VGG16\n", + "from keras.applications import imagenet_utils\n", + "from keras.callbacks import ModelCheckpoint\n", + "from keras.preprocessing.image import load_img\n", + "from keras.preprocessing.image import img_to_array\n", + "from keras.layers import Dense, Conv2D, MaxPooling2D\n", + "from keras.layers import Dropout, Flatten, GlobalAveragePooling2D\n", + " \n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "train = [os.path.join(\"D:\\cleared\",img) for img in os.listdir(\"D:\\cleared\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "437" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(train)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['D:\\\\cleared\\\\204.png',\n", + " 'D:\\\\cleared\\\\205.png',\n", + " 'D:\\\\cleared\\\\206.png',\n", + " 'D:\\\\cleared\\\\207.png',\n", + " 'D:\\\\cleared\\\\208.png']" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "#train_y = [int(img.split(\"\\\\\")[-1].split(\"_\")[0]) for img in train]" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] loading network...\n", + "Model: \"vgg16\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "input_2 (InputLayer) (None, None, None, 3) 0 \n", + "_________________________________________________________________\n", + "block1_conv1 (Conv2D) (None, None, None, 64) 1792 \n", + "_________________________________________________________________\n", + "block1_conv2 (Conv2D) (None, None, None, 64) 36928 \n", + "_________________________________________________________________\n", + "block1_pool (MaxPooling2D) (None, None, None, 64) 0 \n", + "_________________________________________________________________\n", + "block2_conv1 (Conv2D) (None, None, None, 128) 73856 \n", + "_________________________________________________________________\n", + "block2_conv2 (Conv2D) (None, None, None, 128) 147584 \n", + "_________________________________________________________________\n", + "block2_pool (MaxPooling2D) (None, None, None, 128) 0 \n", + "_________________________________________________________________\n", + "block3_conv1 (Conv2D) (None, None, None, 256) 295168 \n", + "_________________________________________________________________\n", + "block3_conv2 (Conv2D) (None, None, None, 256) 590080 \n", + "_________________________________________________________________\n", + "block3_conv3 (Conv2D) (None, None, None, 256) 590080 \n", + "_________________________________________________________________\n", + "block3_pool (MaxPooling2D) (None, None, None, 256) 0 \n", + "_________________________________________________________________\n", + "block4_conv1 (Conv2D) (None, None, None, 512) 1180160 \n", + "_________________________________________________________________\n", + "block4_conv2 (Conv2D) (None, None, None, 512) 2359808 \n", + "_________________________________________________________________\n", + "block4_conv3 (Conv2D) (None, None, None, 512) 2359808 \n", + "_________________________________________________________________\n", + "block4_pool (MaxPooling2D) (None, None, None, 512) 0 \n", + "_________________________________________________________________\n", + "block5_conv1 (Conv2D) (None, None, None, 512) 2359808 \n", + "_________________________________________________________________\n", + "block5_conv2 (Conv2D) (None, None, None, 512) 2359808 \n", + "_________________________________________________________________\n", + "block5_conv3 (Conv2D) (None, None, None, 512) 2359808 \n", + "_________________________________________________________________\n", + "block5_pool (MaxPooling2D) (None, None, None, 512) 0 \n", + "=================================================================\n", + "Total params: 14,714,688\n", + "Trainable params: 14,714,688\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "# load the VGG16 network\n", + "print(\"[INFO] loading network...\")\n", + " \n", + "# chop the top dense layers, include_top=False\n", + "model = VGG16(weights=\"imagenet\", include_top=False)\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "def create_features(dataset, pre_model):\n", + " \n", + " x_scratch = []\n", + "\n", + " for imagePath in dataset:\n", + " \n", + " image = load_img(imagePath, target_size=(224, 224))\n", + " image = img_to_array(image)\n", + "\n", + " image = np.expand_dims(image, axis=0)\n", + " image = imagenet_utils.preprocess_input(image)\n", + " \n", + " x_scratch.append(image)\n", + " \n", + " x = np.vstack(x_scratch)\n", + " features = pre_model.predict(x, batch_size=32)\n", + " features_flatten = features.reshape((features.shape[0], 7 * 7 * 512))\n", + " return x, features, features_flatten" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "train_x, train_features, train_features_flatten = create_features(train, model)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(437, 224, 224, 3) (437, 7, 7, 512) (437, 25088)\n" + ] + } + ], + "source": [ + "print(train_x.shape, train_features.shape, train_features_flatten.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "z = pd.DataFrame(train_features_flatten)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(437, 25088)" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>...</th>\n", + " <th>25078</th>\n", + " <th>25079</th>\n", + " <th>25080</th>\n", + " <th>25081</th>\n", + " <th>25082</th>\n", + " <th>25083</th>\n", + " <th>25084</th>\n", + " <th>25085</th>\n", + " <th>25086</th>\n", + " <th>25087</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>...</td>\n", + " <td>5.332700</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.897519</td>\n", + " <td>0.00000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.203241</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>...</td>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " <td>5.51048</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.478585</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>...</td>\n", + " <td>4.385857</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.342268</td>\n", + " <td>0.00000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>...</td>\n", + " <td>4.716055</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.382866</td>\n", + " <td>0.00000</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.227291</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>...</td>\n", + " <td>3.947851</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.000000</td>\n", + " <td>0.00000</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 25088 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 \\\n", + "0 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.203241 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.478585 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.227291 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " ... 25078 25079 25080 25081 25082 25083 25084 25085 25086 \\\n", + "0 ... 5.332700 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.897519 \n", + "1 ... 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 \n", + "2 ... 4.385857 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.342268 \n", + "3 ... 4.716055 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.382866 \n", + "4 ... 3.947851 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 \n", + "\n", + " 25087 \n", + "0 0.00000 \n", + "1 5.51048 \n", + "2 0.00000 \n", + "3 0.00000 \n", + "4 0.00000 \n", + "\n", + "[5 rows x 25088 columns]" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "z.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "scaler.fit(z)\n", + "scaled_data = scaler.transform(z)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(437, 25088)" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#scaled_data = scaled_data.flatten()\n", + "scaled_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "pca = PCA(n_components = 300)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PCA(copy=True, iterated_power='auto', n_components=300, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca.fit(scaled_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "x_pca = pca.transform(scaled_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9207363059394993" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(pca.explained_variance_ratio_)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(437, 300)" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-1.0444730e+01, 4.6611047e+00, 9.8495493e+00, ...,\n", + " 1.1189542e+00, -4.3036225e-01, -5.3842467e-01],\n", + " [-3.5536179e+01, 7.7843609e+00, 4.2844013e+01, ...,\n", + " -9.7006187e-02, -4.0296584e-02, -4.5649782e-01],\n", + " [-4.6288466e+00, 1.1535113e+00, -1.8068393e+00, ...,\n", + " -1.5006965e+00, -2.1910863e+00, 7.4389362e+00],\n", + " ...,\n", + " [-7.1319375e+00, -1.7024778e+00, -3.6759336e+00, ...,\n", + " -4.7672758e+00, 3.1064281e+00, -1.3014789e-01],\n", + " [-7.9709377e+00, -2.5413103e+01, -1.4869870e+00, ...,\n", + " -2.5454619e+00, 2.8093722e-01, -7.9439054e+00],\n", + " [-1.8498642e+01, -1.3908784e+01, -2.2208200e+00, ...,\n", + " 2.4384363e+00, -5.1710737e-01, 1.0533013e+00]], dtype=float32)" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_pca" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [], + "source": [ + "k = pd.DataFrame(x_pca)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>...</th>\n", + " <th>290</th>\n", + " <th>291</th>\n", + " <th>292</th>\n", + " <th>293</th>\n", + " <th>294</th>\n", + " <th>295</th>\n", + " <th>296</th>\n", + " <th>297</th>\n", + " <th>298</th>\n", + " <th>299</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-10.444730</td>\n", + " <td>4.661105</td>\n", + " <td>9.849549</td>\n", + " <td>-0.903321</td>\n", + " <td>-10.143111</td>\n", + " <td>-13.127480</td>\n", + " <td>11.195497</td>\n", + " <td>3.299569</td>\n", + " <td>46.441799</td>\n", + " <td>2.735783</td>\n", + " <td>...</td>\n", + " <td>1.536737</td>\n", + " <td>-0.060611</td>\n", + " <td>0.323464</td>\n", + " <td>0.413137</td>\n", + " <td>-0.268665</td>\n", + " <td>-0.126839</td>\n", + " <td>-1.189388</td>\n", + " <td>1.118954</td>\n", + " <td>-0.430362</td>\n", + " <td>-0.538425</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-35.536179</td>\n", + " <td>7.784361</td>\n", + " <td>42.844013</td>\n", + " <td>2.155625</td>\n", + " <td>24.841558</td>\n", + " <td>18.057373</td>\n", + " <td>-1.330271</td>\n", + " <td>-5.816511</td>\n", + " <td>-20.640882</td>\n", + " <td>27.364361</td>\n", + " <td>...</td>\n", + " <td>0.023582</td>\n", + " <td>0.638392</td>\n", + " <td>0.640741</td>\n", + " <td>0.594092</td>\n", + " <td>-0.511576</td>\n", + " <td>-0.260805</td>\n", + " <td>-0.646197</td>\n", + " <td>-0.097006</td>\n", + " <td>-0.040297</td>\n", + " <td>-0.456498</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-4.628847</td>\n", + " <td>1.153511</td>\n", + " <td>-1.806839</td>\n", + " <td>-3.937223</td>\n", + " <td>-20.005037</td>\n", + " <td>-0.066961</td>\n", + " <td>-2.847182</td>\n", + " <td>-3.588678</td>\n", + " <td>5.466370</td>\n", + " <td>-3.760830</td>\n", + " <td>...</td>\n", + " <td>2.320066</td>\n", + " <td>3.523527</td>\n", + " <td>-0.553331</td>\n", + " <td>-6.991115</td>\n", + " <td>-2.121491</td>\n", + " <td>-10.109548</td>\n", + " <td>-0.605339</td>\n", + " <td>-1.500697</td>\n", + " <td>-2.191086</td>\n", + " <td>7.438936</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>-18.351189</td>\n", + " <td>-0.658571</td>\n", + " <td>-15.081880</td>\n", + " <td>3.658185</td>\n", + " <td>-10.691589</td>\n", + " <td>-1.246146</td>\n", + " <td>-4.018961</td>\n", + " <td>0.788919</td>\n", + " <td>8.579920</td>\n", + " <td>-3.446223</td>\n", + " <td>...</td>\n", + " <td>-8.369588</td>\n", + " <td>-1.051060</td>\n", + " <td>-1.864871</td>\n", + " <td>-2.089530</td>\n", + " <td>-2.654392</td>\n", + " <td>1.654212</td>\n", + " <td>7.841827</td>\n", + " <td>1.138777</td>\n", + " <td>-3.541554</td>\n", + " <td>1.616553</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-8.859509</td>\n", + " <td>-8.935403</td>\n", + " <td>-3.350512</td>\n", + " <td>2.252593</td>\n", + " <td>5.826995</td>\n", + " <td>10.123581</td>\n", + " <td>18.911684</td>\n", + " <td>-2.141064</td>\n", + " <td>-15.519496</td>\n", + " <td>-8.299529</td>\n", + " <td>...</td>\n", + " <td>0.647525</td>\n", + " <td>-0.061152</td>\n", + " <td>0.680764</td>\n", + " <td>-0.177417</td>\n", + " <td>0.623649</td>\n", + " <td>-0.307552</td>\n", + " <td>-0.176116</td>\n", + " <td>0.158529</td>\n", + " <td>0.265895</td>\n", + " <td>0.310474</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 300 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 \\\n", + "0 -10.444730 4.661105 9.849549 -0.903321 -10.143111 -13.127480 11.195497 \n", + "1 -35.536179 7.784361 42.844013 2.155625 24.841558 18.057373 -1.330271 \n", + "2 -4.628847 1.153511 -1.806839 -3.937223 -20.005037 -0.066961 -2.847182 \n", + "3 -18.351189 -0.658571 -15.081880 3.658185 -10.691589 -1.246146 -4.018961 \n", + "4 -8.859509 -8.935403 -3.350512 2.252593 5.826995 10.123581 18.911684 \n", + "\n", + " 7 8 9 ... 290 291 292 \\\n", + "0 3.299569 46.441799 2.735783 ... 1.536737 -0.060611 0.323464 \n", + "1 -5.816511 -20.640882 27.364361 ... 0.023582 0.638392 0.640741 \n", + "2 -3.588678 5.466370 -3.760830 ... 2.320066 3.523527 -0.553331 \n", + "3 0.788919 8.579920 -3.446223 ... -8.369588 -1.051060 -1.864871 \n", + "4 -2.141064 -15.519496 -8.299529 ... 0.647525 -0.061152 0.680764 \n", + "\n", + " 293 294 295 296 297 298 299 \n", + "0 0.413137 -0.268665 -0.126839 -1.189388 1.118954 -0.430362 -0.538425 \n", + "1 0.594092 -0.511576 -0.260805 -0.646197 -0.097006 -0.040297 -0.456498 \n", + "2 -6.991115 -2.121491 -10.109548 -0.605339 -1.500697 -2.191086 7.438936 \n", + "3 -2.089530 -2.654392 1.654212 7.841827 1.138777 -3.541554 1.616553 \n", + "4 -0.177417 0.623649 -0.307552 -0.176116 0.158529 0.265895 0.310474 \n", + "\n", + "[5 rows x 300 columns]" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "# k.to_csv(r'C:\\Users\\Ankit\\Desktop\\cleaned_features.csv',index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "k=k.round()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " <th>...</th>\n", + " <th>290</th>\n", + " <th>291</th>\n", + " <th>292</th>\n", + " <th>293</th>\n", + " <th>294</th>\n", + " <th>295</th>\n", + " <th>296</th>\n", + " <th>297</th>\n", + " <th>298</th>\n", + " <th>299</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>-10.0</td>\n", + " <td>5.0</td>\n", + " <td>10.0</td>\n", + " <td>-1.0</td>\n", + " <td>-10.0</td>\n", + " <td>-13.0</td>\n", + " <td>11.0</td>\n", + " <td>3.0</td>\n", + " <td>46.0</td>\n", + " <td>3.0</td>\n", + " <td>...</td>\n", + " <td>2.0</td>\n", + " <td>-0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " <td>-0.0</td>\n", + " <td>-1.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>-36.0</td>\n", + " <td>8.0</td>\n", + " <td>43.0</td>\n", + " <td>2.0</td>\n", + " <td>25.0</td>\n", + " <td>18.0</td>\n", + " <td>-1.0</td>\n", + " <td>-6.0</td>\n", + " <td>-21.0</td>\n", + " <td>27.0</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>-1.0</td>\n", + " <td>-0.0</td>\n", + " <td>-1.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>-5.0</td>\n", + " <td>1.0</td>\n", + " <td>-2.0</td>\n", + " <td>-4.0</td>\n", + " <td>-20.0</td>\n", + " <td>-0.0</td>\n", + " <td>-3.0</td>\n", + " <td>-4.0</td>\n", + " <td>5.0</td>\n", + " <td>-4.0</td>\n", + " <td>...</td>\n", + " <td>2.0</td>\n", + " <td>4.0</td>\n", + " <td>-1.0</td>\n", + " <td>-7.0</td>\n", + " <td>-2.0</td>\n", + " <td>-10.0</td>\n", + " <td>-1.0</td>\n", + " <td>-2.0</td>\n", + " <td>-2.0</td>\n", + " <td>7.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>-18.0</td>\n", + " <td>-1.0</td>\n", + " <td>-15.0</td>\n", + " <td>4.0</td>\n", + " <td>-11.0</td>\n", + " <td>-1.0</td>\n", + " <td>-4.0</td>\n", + " <td>1.0</td>\n", + " <td>9.0</td>\n", + " <td>-3.0</td>\n", + " <td>...</td>\n", + " <td>-8.0</td>\n", + " <td>-1.0</td>\n", + " <td>-2.0</td>\n", + " <td>-2.0</td>\n", + " <td>-3.0</td>\n", + " <td>2.0</td>\n", + " <td>8.0</td>\n", + " <td>1.0</td>\n", + " <td>-4.0</td>\n", + " <td>2.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>-9.0</td>\n", + " <td>-9.0</td>\n", + " <td>-3.0</td>\n", + " <td>2.0</td>\n", + " <td>6.0</td>\n", + " <td>10.0</td>\n", + " <td>19.0</td>\n", + " <td>-2.0</td>\n", + " <td>-16.0</td>\n", + " <td>-8.0</td>\n", + " <td>...</td>\n", + " <td>1.0</td>\n", + " <td>-0.0</td>\n", + " <td>1.0</td>\n", + " <td>-0.0</td>\n", + " <td>1.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>432</th>\n", + " <td>-2.0</td>\n", + " <td>-3.0</td>\n", + " <td>7.0</td>\n", + " <td>-17.0</td>\n", + " <td>-2.0</td>\n", + " <td>-2.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " <td>4.0</td>\n", + " <td>-0.0</td>\n", + " <td>...</td>\n", + " <td>10.0</td>\n", + " <td>19.0</td>\n", + " <td>-9.0</td>\n", + " <td>-9.0</td>\n", + " <td>1.0</td>\n", + " <td>7.0</td>\n", + " <td>-4.0</td>\n", + " <td>8.0</td>\n", + " <td>-1.0</td>\n", + " <td>-9.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>433</th>\n", + " <td>9.0</td>\n", + " <td>-5.0</td>\n", + " <td>5.0</td>\n", + " <td>-16.0</td>\n", + " <td>6.0</td>\n", + " <td>-4.0</td>\n", + " <td>-2.0</td>\n", + " <td>7.0</td>\n", + " <td>4.0</td>\n", + " <td>-1.0</td>\n", + " <td>...</td>\n", + " <td>0.0</td>\n", + " <td>-0.0</td>\n", + " <td>-0.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " <td>-1.0</td>\n", + " <td>-3.0</td>\n", + " <td>2.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>434</th>\n", + " <td>-7.0</td>\n", + " <td>-2.0</td>\n", + " <td>-4.0</td>\n", + " <td>-0.0</td>\n", + " <td>13.0</td>\n", + " <td>-8.0</td>\n", + " <td>-15.0</td>\n", + " <td>6.0</td>\n", + " <td>0.0</td>\n", + " <td>-3.0</td>\n", + " <td>...</td>\n", + " <td>2.0</td>\n", + " <td>-4.0</td>\n", + " <td>1.0</td>\n", + " <td>-4.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " <td>2.0</td>\n", + " <td>-5.0</td>\n", + " <td>3.0</td>\n", + " <td>-0.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>435</th>\n", + " <td>-8.0</td>\n", + " <td>-25.0</td>\n", + " <td>-1.0</td>\n", + " <td>8.0</td>\n", + " <td>0.0</td>\n", + " <td>5.0</td>\n", + " <td>18.0</td>\n", + " <td>-6.0</td>\n", + " <td>-9.0</td>\n", + " <td>-8.0</td>\n", + " <td>...</td>\n", + " <td>-2.0</td>\n", + " <td>-1.0</td>\n", + " <td>3.0</td>\n", + " <td>0.0</td>\n", + " <td>-6.0</td>\n", + " <td>0.0</td>\n", + " <td>6.0</td>\n", + " <td>-3.0</td>\n", + " <td>0.0</td>\n", + " <td>-8.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>436</th>\n", + " <td>-18.0</td>\n", + " <td>-14.0</td>\n", + " <td>-2.0</td>\n", + " <td>6.0</td>\n", + " <td>3.0</td>\n", + " <td>2.0</td>\n", + " <td>10.0</td>\n", + " <td>-0.0</td>\n", + " <td>-3.0</td>\n", + " <td>-13.0</td>\n", + " <td>...</td>\n", + " <td>-6.0</td>\n", + " <td>2.0</td>\n", + " <td>-0.0</td>\n", + " <td>-3.0</td>\n", + " <td>-4.0</td>\n", + " <td>2.0</td>\n", + " <td>1.0</td>\n", + " <td>2.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>437 rows × 300 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 290 \\\n", + "0 -10.0 5.0 10.0 -1.0 -10.0 -13.0 11.0 3.0 46.0 3.0 ... 2.0 \n", + "1 -36.0 8.0 43.0 2.0 25.0 18.0 -1.0 -6.0 -21.0 27.0 ... 0.0 \n", + "2 -5.0 1.0 -2.0 -4.0 -20.0 -0.0 -3.0 -4.0 5.0 -4.0 ... 2.0 \n", + "3 -18.0 -1.0 -15.0 4.0 -11.0 -1.0 -4.0 1.0 9.0 -3.0 ... -8.0 \n", + "4 -9.0 -9.0 -3.0 2.0 6.0 10.0 19.0 -2.0 -16.0 -8.0 ... 1.0 \n", + ".. ... ... ... ... ... ... ... ... ... ... ... ... \n", + "432 -2.0 -3.0 7.0 -17.0 -2.0 -2.0 -0.0 -0.0 4.0 -0.0 ... 10.0 \n", + "433 9.0 -5.0 5.0 -16.0 6.0 -4.0 -2.0 7.0 4.0 -1.0 ... 0.0 \n", + "434 -7.0 -2.0 -4.0 -0.0 13.0 -8.0 -15.0 6.0 0.0 -3.0 ... 2.0 \n", + "435 -8.0 -25.0 -1.0 8.0 0.0 5.0 18.0 -6.0 -9.0 -8.0 ... -2.0 \n", + "436 -18.0 -14.0 -2.0 6.0 3.0 2.0 10.0 -0.0 -3.0 -13.0 ... -6.0 \n", + "\n", + " 291 292 293 294 295 296 297 298 299 \n", + "0 -0.0 0.0 0.0 -0.0 -0.0 -1.0 1.0 -0.0 -1.0 \n", + "1 1.0 1.0 1.0 -1.0 -0.0 -1.0 -0.0 -0.0 -0.0 \n", + "2 4.0 -1.0 -7.0 -2.0 -10.0 -1.0 -2.0 -2.0 7.0 \n", + "3 -1.0 -2.0 -2.0 -3.0 2.0 8.0 1.0 -4.0 2.0 \n", + "4 -0.0 1.0 -0.0 1.0 -0.0 -0.0 0.0 0.0 0.0 \n", + ".. ... ... ... ... ... ... ... ... ... \n", + "432 19.0 -9.0 -9.0 1.0 7.0 -4.0 8.0 -1.0 -9.0 \n", + "433 -0.0 -0.0 -1.0 1.0 -1.0 -3.0 2.0 0.0 0.0 \n", + "434 -4.0 1.0 -4.0 -1.0 1.0 2.0 -5.0 3.0 -0.0 \n", + "435 -1.0 3.0 0.0 -6.0 0.0 6.0 -3.0 0.0 -8.0 \n", + "436 2.0 -0.0 -3.0 -4.0 2.0 1.0 2.0 -1.0 1.0 \n", + "\n", + "[437 rows x 300 columns]" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k.apply(pd.to_numeric)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- GitLab