{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": { "id": "ZLtZkXujsBRi" }, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn import metrics" ] }, { "cell_type": "code", "source": [ "df = pd.read_csv(\"pima_indian.csv\")\n", "feature_col_names = ['num_preg', 'glucose_conc', 'diastolic_bp', 'thickness', 'insulin', 'bmi', 'diab_pred', 'age']\n", "predicted_class_names = ['diabetes']" ], "metadata": { "id": "uuLncoHFtosw" }, "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ "df.head(10)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "id": "Ju_vl25Su1R6", "outputId": "3f016c27-8b16-4fed-b436-98ceb2965dc1" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " num_preg glucose_conc diastolic_bp thickness insulin bmi diab_pred \\\n", "0 6 148 72 35 0 33.6 0.627 \n", "1 1 85 66 29 0 26.6 0.351 \n", "2 8 183 64 0 0 23.3 0.672 \n", "3 1 89 66 23 94 28.1 0.167 \n", "4 0 137 40 35 168 43.1 2.288 \n", "5 5 116 74 0 0 25.6 0.201 \n", "6 3 78 50 32 88 31.0 0.248 \n", "7 10 115 0 0 0 35.3 0.134 \n", "8 2 197 70 45 543 30.5 0.158 \n", "9 8 125 96 0 0 0.0 0.232 \n", "\n", " age diabetes \n", "0 50 1 \n", "1 31 0 \n", "2 32 1 \n", "3 21 0 \n", "4 33 1 \n", "5 30 0 \n", "6 26 1 \n", "7 29 0 \n", "8 53 1 \n", "9 54 1 " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
num_pregglucose_concdiastolic_bpthicknessinsulinbmidiab_predagediabetes
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
55116740025.60.201300
637850328831.00.248261
71011500035.30.134290
82197704554330.50.158531
9812596000.00.232541
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "avg_bmi = ['bmi']\n", "\n", "z=df[avg_bmi].mean()\n", "print(z)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "RgrzQcWHyXBB", "outputId": "3d6c6ef6-d037-4b67-8bbc-1b5b9ca05f1d" }, "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "bmi 31.992578\n", "dtype: float64\n" ] } ] }, { "cell_type": "code", "source": [ "print(df[\"bmi\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Sf3yfhViyt0h", "outputId": "5a766633-95f4-48fa-ac45-a9d6fd27bcc8" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0 33.6\n", "1 26.6\n", "2 23.3\n", "3 28.1\n", "4 43.1\n", " ... \n", "763 32.9\n", "764 36.8\n", "765 26.2\n", "766 30.1\n", "767 30.4\n", "Name: bmi, Length: 768, dtype: float64\n" ] } ] }, { "cell_type": "code", "source": [ "X = df[feature_col_names].values # these are factors for the prediction\n", "y = df[predicted_class_names].values # this is what we want to predict" ], "metadata": { "id": "dSO_4AVMtucl" }, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "source": [ "xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2)" ], "metadata": { "id": "MCeISkoVuD_1" }, "execution_count": 11, "outputs": [] }, { "cell_type": "code", "source": [ "print ('\\n the total number of Training Data :',ytrain.shape)\n", "print ('\\n the total number of Test Data :',ytest.shape)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fOJBnG36uKhV", "outputId": "402a0978-4a73-4c14-8eab-c1162e899821" }, "execution_count": 12, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " the total number of Training Data : (614, 1)\n", "\n", " the total number of Test Data : (154, 1)\n" ] } ] }, { "cell_type": "code", "source": [ "clf = GaussianNB().fit(xtrain,ytrain.ravel())\n", "predicted = clf.predict(xtest)\n", "predictTestData= clf.predict([[6,148,72,35,0,33.6,0.627,50]])" ], "metadata": { "id": "b4MN6ZWhuOgV" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "source": [ "print('\\n Confusion matrix')\n", "print(metrics.confusion_matrix(ytest,predicted))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lTJ5MbouuQuc", "outputId": "109d3f8b-2f8d-49c4-c911-446f63ff7ff0" }, "execution_count": 14, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " Confusion matrix\n", "[[73 18]\n", " [23 40]]\n" ] } ] }, { "cell_type": "code", "source": [ "print('\\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "F39kZhRvucUi", "outputId": "2421cb78-9ea4-4619-e965-7db1d01bfd05" }, "execution_count": 15, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " Accuracy of the classifier is 0.7337662337662337\n" ] } ] }, { "cell_type": "code", "source": [ "print('\\n The value of Precision', metrics.precision_score(ytest,predicted))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LPqHOzbpuc7f", "outputId": "645c5b4f-4975-448c-8ef3-7fdfdef89645" }, "execution_count": 16, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " The value of Precision 0.6896551724137931\n" ] } ] }, { "cell_type": "code", "source": [ "print('\\n The value of Recall', metrics.recall_score(ytest,predicted))\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uiWbQm2xuc-4", "outputId": "69b5f612-eeb2-4dbd-d93d-f316d58c2774" }, "execution_count": 17, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " The value of Recall 0.6349206349206349\n" ] } ] }, { "cell_type": "code", "source": [ "\n", "print(\"Predicted Value for individual Test Data:\", predictTestData)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mDWfejKQuVId", "outputId": "56f2b3e7-4372-48ec-aea6-8c2de54cf6e3" }, "execution_count": 18, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Predicted Value for individual Test Data: [1]\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "EFbOs02iuxvq" }, "execution_count": 18, "outputs": [] } ] }