{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "id": "ZLtZkXujsBRi"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.naive_bayes import GaussianNB\n",
        "from sklearn import metrics"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df = pd.read_csv(\"pima_indian.csv\")\n",
        "feature_col_names = ['num_preg', 'glucose_conc', 'diastolic_bp', 'thickness', 'insulin', 'bmi', 'diab_pred', 'age']\n",
        "predicted_class_names = ['diabetes']"
      ],
      "metadata": {
        "id": "uuLncoHFtosw"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df.head(10)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 363
        },
        "id": "Ju_vl25Su1R6",
        "outputId": "3f016c27-8b16-4fed-b436-98ceb2965dc1"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   num_preg  glucose_conc  diastolic_bp  thickness  insulin   bmi  diab_pred  \\\n",
              "0         6           148            72         35        0  33.6      0.627   \n",
              "1         1            85            66         29        0  26.6      0.351   \n",
              "2         8           183            64          0        0  23.3      0.672   \n",
              "3         1            89            66         23       94  28.1      0.167   \n",
              "4         0           137            40         35      168  43.1      2.288   \n",
              "5         5           116            74          0        0  25.6      0.201   \n",
              "6         3            78            50         32       88  31.0      0.248   \n",
              "7        10           115             0          0        0  35.3      0.134   \n",
              "8         2           197            70         45      543  30.5      0.158   \n",
              "9         8           125            96          0        0   0.0      0.232   \n",
              "\n",
              "   age  diabetes  \n",
              "0   50         1  \n",
              "1   31         0  \n",
              "2   32         1  \n",
              "3   21         0  \n",
              "4   33         1  \n",
              "5   30         0  \n",
              "6   26         1  \n",
              "7   29         0  \n",
              "8   53         1  \n",
              "9   54         1  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-a39c931a-cf0d-4c66-9c97-0b3cf0d18408\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>num_preg</th>\n",
              "      <th>glucose_conc</th>\n",
              "      <th>diastolic_bp</th>\n",
              "      <th>thickness</th>\n",
              "      <th>insulin</th>\n",
              "      <th>bmi</th>\n",
              "      <th>diab_pred</th>\n",
              "      <th>age</th>\n",
              "      <th>diabetes</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>6</td>\n",
              "      <td>148</td>\n",
              "      <td>72</td>\n",
              "      <td>35</td>\n",
              "      <td>0</td>\n",
              "      <td>33.6</td>\n",
              "      <td>0.627</td>\n",
              "      <td>50</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>85</td>\n",
              "      <td>66</td>\n",
              "      <td>29</td>\n",
              "      <td>0</td>\n",
              "      <td>26.6</td>\n",
              "      <td>0.351</td>\n",
              "      <td>31</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>8</td>\n",
              "      <td>183</td>\n",
              "      <td>64</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>23.3</td>\n",
              "      <td>0.672</td>\n",
              "      <td>32</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>89</td>\n",
              "      <td>66</td>\n",
              "      <td>23</td>\n",
              "      <td>94</td>\n",
              "      <td>28.1</td>\n",
              "      <td>0.167</td>\n",
              "      <td>21</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>137</td>\n",
              "      <td>40</td>\n",
              "      <td>35</td>\n",
              "      <td>168</td>\n",
              "      <td>43.1</td>\n",
              "      <td>2.288</td>\n",
              "      <td>33</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>5</td>\n",
              "      <td>116</td>\n",
              "      <td>74</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>25.6</td>\n",
              "      <td>0.201</td>\n",
              "      <td>30</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>3</td>\n",
              "      <td>78</td>\n",
              "      <td>50</td>\n",
              "      <td>32</td>\n",
              "      <td>88</td>\n",
              "      <td>31.0</td>\n",
              "      <td>0.248</td>\n",
              "      <td>26</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>10</td>\n",
              "      <td>115</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>35.3</td>\n",
              "      <td>0.134</td>\n",
              "      <td>29</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>2</td>\n",
              "      <td>197</td>\n",
              "      <td>70</td>\n",
              "      <td>45</td>\n",
              "      <td>543</td>\n",
              "      <td>30.5</td>\n",
              "      <td>0.158</td>\n",
              "      <td>53</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>8</td>\n",
              "      <td>125</td>\n",
              "      <td>96</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>0.232</td>\n",
              "      <td>54</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a39c931a-cf0d-4c66-9c97-0b3cf0d18408')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-a39c931a-cf0d-4c66-9c97-0b3cf0d18408 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-a39c931a-cf0d-4c66-9c97-0b3cf0d18408');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "avg_bmi = ['bmi']\n",
        "\n",
        "z=df[avg_bmi].mean()\n",
        "print(z)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RgrzQcWHyXBB",
        "outputId": "3d6c6ef6-d037-4b67-8bbc-1b5b9ca05f1d"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "bmi    31.992578\n",
            "dtype: float64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(df[\"bmi\"])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Sf3yfhViyt0h",
        "outputId": "5a766633-95f4-48fa-ac45-a9d6fd27bcc8"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0      33.6\n",
            "1      26.6\n",
            "2      23.3\n",
            "3      28.1\n",
            "4      43.1\n",
            "       ... \n",
            "763    32.9\n",
            "764    36.8\n",
            "765    26.2\n",
            "766    30.1\n",
            "767    30.4\n",
            "Name: bmi, Length: 768, dtype: float64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "X = df[feature_col_names].values # these are factors for the prediction\n",
        "y = df[predicted_class_names].values # this is what we want to predict"
      ],
      "metadata": {
        "id": "dSO_4AVMtucl"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.2)"
      ],
      "metadata": {
        "id": "MCeISkoVuD_1"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print ('\\n the total number of Training Data :',ytrain.shape)\n",
        "print ('\\n the total number of Test Data :',ytest.shape)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fOJBnG36uKhV",
        "outputId": "402a0978-4a73-4c14-8eab-c1162e899821"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            " the total number of Training Data : (614, 1)\n",
            "\n",
            " the total number of Test Data : (154, 1)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "clf = GaussianNB().fit(xtrain,ytrain.ravel())\n",
        "predicted = clf.predict(xtest)\n",
        "predictTestData= clf.predict([[6,148,72,35,0,33.6,0.627,50]])"
      ],
      "metadata": {
        "id": "b4MN6ZWhuOgV"
      },
      "execution_count": 13,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print('\\n Confusion matrix')\n",
        "print(metrics.confusion_matrix(ytest,predicted))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "lTJ5MbouuQuc",
        "outputId": "109d3f8b-2f8d-49c4-c911-446f63ff7ff0"
      },
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            " Confusion matrix\n",
            "[[73 18]\n",
            " [23 40]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print('\\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "F39kZhRvucUi",
        "outputId": "2421cb78-9ea4-4619-e965-7db1d01bfd05"
      },
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            " Accuracy of the classifier is 0.7337662337662337\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print('\\n The value of Precision', metrics.precision_score(ytest,predicted))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LPqHOzbpuc7f",
        "outputId": "645c5b4f-4975-448c-8ef3-7fdfdef89645"
      },
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            " The value of Precision 0.6896551724137931\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print('\\n The value of Recall', metrics.recall_score(ytest,predicted))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uiWbQm2xuc-4",
        "outputId": "69b5f612-eeb2-4dbd-d93d-f316d58c2774"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            " The value of Recall 0.6349206349206349\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "print(\"Predicted Value for individual Test Data:\", predictTestData)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mDWfejKQuVId",
        "outputId": "56f2b3e7-4372-48ec-aea6-8c2de54cf6e3"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Predicted Value for individual Test Data: [1]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "EFbOs02iuxvq"
      },
      "execution_count": 18,
      "outputs": []
    }
  ]
}