diff --git a/Jupyter Notebook/Distribution_of_Head_Circumference.ipynb b/Jupyter Notebook/Distribution_of_Head_Circumference.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fb2fcab8db07c419dab187029509d6151f0b22d5 --- /dev/null +++ b/Jupyter Notebook/Distribution_of_Head_Circumference.ipynb @@ -0,0 +1,547 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "df =pd.read_csv('training_set_pixel_size_and_HC.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>filename</th>\n", + " <th>pixel size(mm)</th>\n", + " <th>head circumference (mm)</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>000_HC.png</td>\n", + " <td>0.069136</td>\n", + " <td>44.30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>001_HC.png</td>\n", + " <td>0.089659</td>\n", + " <td>56.81</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>002_HC.png</td>\n", + " <td>0.062033</td>\n", + " <td>68.75</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>003_HC.png</td>\n", + " <td>0.091291</td>\n", + " <td>69.00</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>004_HC.png</td>\n", + " <td>0.061240</td>\n", + " <td>59.81</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>994</th>\n", + " <td>801_HC.png</td>\n", + " <td>0.296851</td>\n", + " <td>329.26</td>\n", + " </tr>\n", + " <tr>\n", + " <th>995</th>\n", + " <td>802_HC.png</td>\n", + " <td>0.212267</td>\n", + " <td>330.90</td>\n", + " </tr>\n", + " <tr>\n", + " <th>996</th>\n", + " <td>803_HC.png</td>\n", + " <td>0.202743</td>\n", + " <td>331.70</td>\n", + " </tr>\n", + " <tr>\n", + " <th>997</th>\n", + " <td>804_HC.png</td>\n", + " <td>0.281090</td>\n", + " <td>316.48</td>\n", + " </tr>\n", + " <tr>\n", + " <th>998</th>\n", + " <td>805_HC.png</td>\n", + " <td>0.257475</td>\n", + " <td>330.70</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>999 rows × 3 columns</p>\n", + "</div>" + ], + "text/plain": [ + " filename pixel size(mm) head circumference (mm)\n", + "0 000_HC.png 0.069136 44.30\n", + "1 001_HC.png 0.089659 56.81\n", + "2 002_HC.png 0.062033 68.75\n", + "3 003_HC.png 0.091291 69.00\n", + "4 004_HC.png 0.061240 59.81\n", + ".. ... ... ...\n", + "994 801_HC.png 0.296851 329.26\n", + "995 802_HC.png 0.212267 330.90\n", + "996 803_HC.png 0.202743 331.70\n", + "997 804_HC.png 0.281090 316.48\n", + "998 805_HC.png 0.257475 330.70\n", + "\n", + "[999 rows x 3 columns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "# df[(df['category_']<300) & (df['category']>=280)] = 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "10 - 50 - 1\n", + "50 - 60 - 4\n", + "60 - 65 - 9 0\n", + "65 - 70 - 28\n", + "70 - 75 - 30\n", + "----------------------------\n", + "75 - 80 - 47\n", + "80 - 85 - 26\n", + "85 - 90 - 9 1\n", + "90 - 95 - 9\n", + "95 - 100 - 20\n", + "---------------------------\n", + "100 - 150 - 61 2\n", + "---------------------------\n", + "150 - 200 - 538 \n", + "\n", + "150 - 160 3\n", + "160 - 170 4\n", + "170 - 180 5\n", + "180 - 190 6\n", + "190 - 200 7\n", + "---------------------------\n", + "200 - 250 8\n", + "---------------------------\n", + "250 - 280 9\n", + "---------------------------\n", + "280 - 300 10\n", + "---------------------------\n", + "300 - 350 11\n", + "---------------------------\n", + "###350 is last" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "df['category'] = df['head circumference (mm)']" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "df['cat_int'] = df['category'].astype('int')" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "df['categorical_con'] = df['cat_int'].astype('category')" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "994 11\n", + "995 11\n", + "996 11\n", + "997 11\n", + "998 11\n", + "Name: categorical_con, Length: 999, dtype: category\n", + "Categories (28, int64): [0, 1, 2, 3, ..., 294, 296, 298, 299]" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['categorical_con']" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(r'updated.csv', index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>filename</th>\n", + " <th>pixel size(mm)</th>\n", + " <th>head circumference (mm)</th>\n", + " <th>category</th>\n", + " <th>cat_int</th>\n", + " <th>categorical_con</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>994</th>\n", + " <td>11</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>995</th>\n", + " <td>11</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>996</th>\n", + " <td>11</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>997</th>\n", + " <td>11</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>998</th>\n", + " <td>11</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11.0</td>\n", + " <td>11</td>\n", + " <td>11</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>999 rows × 6 columns</p>\n", + "</div>" + ], + "text/plain": [ + " filename pixel size(mm) head circumference (mm) category cat_int \\\n", + "0 0 0.0 0.0 0.0 0 \n", + "1 0 0.0 0.0 0.0 0 \n", + "2 0 0.0 0.0 0.0 0 \n", + "3 0 0.0 0.0 0.0 0 \n", + "4 0 0.0 0.0 0.0 0 \n", + ".. ... ... ... ... ... \n", + "994 11 11.0 11.0 11.0 11 \n", + "995 11 11.0 11.0 11.0 11 \n", + "996 11 11.0 11.0 11.0 11 \n", + "997 11 11.0 11.0 11.0 11 \n", + "998 11 11.0 11.0 11.0 11 \n", + "\n", + " categorical_con \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + ".. ... \n", + "994 11 \n", + "995 11 \n", + "996 11 \n", + "997 11 \n", + "998 11 \n", + "\n", + "[999 rows x 6 columns]" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}