{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Computer Vision: Clasificación de Imágenes (train CNN)\n", "\n", "Referencia: https://lopezyse.medium.com/computer-vision-image-classification-using-python-913cf7156812" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\Users\\\\CynYDie\\\\Desktop\\\\UTN_Haedo\\\\Clases\\\\Clase14_CV'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.getcwd()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Explorar dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Import libraries\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from tensorflow.keras.datasets import cifar10\n", "from tensorflow.keras.utils import to_categorical" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", "\u001b[1m170498071/170498071\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 0us/step\n", "x_train shape: (50000, 32, 32, 3)\n", "y_train shape: (50000, 1)\n", "x_test shape: (10000, 32, 32, 3)\n", "y_test shape: (10000, 1)\n", "Number of classes: 10\n" ] } ], "source": [ "# Load CIFAR-10 data\n", "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", "\n", "# Dataset shapes\n", "print(f\"x_train shape: {x_train.shape}\")\n", "print(f\"y_train shape: {y_train.shape}\")\n", "print(f\"x_test shape: {x_test.shape}\")\n", "print(f\"y_test shape: {y_test.shape}\")\n", "\n", "# Number of unique classes\n", "num_classes = len(np.unique(y_train))\n", "print(f\"Number of classes: {num_classes}\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Define class names for easy reference\n", "class_names = [\"airplane\", \"automobile\", \"bird\", \"cat\", \"deer\", \n", " \"dog\", \"frog\", \"horse\", \"ship\", \"truck\"]\n", "\n", "# Plot a 3x3 grid of random images from the dataset with their labels\n", "plt.figure(figsize=(5, 5))\n", "for i in range(9):\n", " index = np.random.randint(0, len(x_train))\n", " plt.subplot(3, 3, i + 1)\n", " plt.imshow(x_train[index])\n", " plt.title(class_names[y_train[index][0]])\n", " plt.axis(\"off\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Count occurrences of each class in the training set\n", "class_counts = np.bincount(y_train.flatten())\n", "\n", "# Plot the class distribution\n", "plt.figure(figsize=(10, 5))\n", "plt.bar(class_names, class_counts, color='skyblue')\n", "plt.title(\"Class Distribution in CIFAR-10 Training Set\")\n", "plt.xlabel(\"Class\")\n", "plt.ylabel(\"Number of Images\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Minimum pixel value: 0\n", "Maximum pixel value: 255\n" ] } ], "source": [ "# Check the minimum and maximum pixel values\n", "print(f\"Minimum pixel value: {x_train.min()}\")\n", "print(f\"Maximum pixel value: {x_train.max()}\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\CynYDie\\Anaconda3\\envs\\cv\\Lib\\site-packages\\keras\\src\\layers\\convolutional\\base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m29s\u001b[0m 31ms/step - accuracy: 0.3715 - loss: 1.7314 - val_accuracy: 0.5780 - val_loss: 1.2213\n", "Epoch 2/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m40s\u001b[0m 30ms/step - accuracy: 0.5793 - loss: 1.1898 - val_accuracy: 0.6089 - val_loss: 1.1312\n", "Epoch 3/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 29ms/step - accuracy: 0.6380 - loss: 1.0301 - val_accuracy: 0.6399 - val_loss: 1.0330\n", "Epoch 4/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m25s\u001b[0m 31ms/step - accuracy: 0.6837 - loss: 0.9078 - val_accuracy: 0.6855 - val_loss: 0.9075\n", "Epoch 5/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m26s\u001b[0m 33ms/step - accuracy: 0.7129 - loss: 0.8235 - val_accuracy: 0.6709 - val_loss: 0.9588\n", "Epoch 6/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 31ms/step - accuracy: 0.7406 - loss: 0.7517 - val_accuracy: 0.6890 - val_loss: 0.9068\n", "Epoch 7/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m25s\u001b[0m 31ms/step - accuracy: 0.7562 - loss: 0.6966 - val_accuracy: 0.6983 - val_loss: 0.8936\n", "Epoch 8/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m30s\u001b[0m 38ms/step - accuracy: 0.7800 - loss: 0.6252 - val_accuracy: 0.7028 - val_loss: 0.8958\n", "Epoch 9/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m36s\u001b[0m 31ms/step - accuracy: 0.8004 - loss: 0.5810 - val_accuracy: 0.6968 - val_loss: 0.9217\n", "Epoch 10/10\n", "\u001b[1m782/782\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m25s\u001b[0m 32ms/step - accuracy: 0.8187 - loss: 0.5179 - val_accuracy: 0.7015 - val_loss: 0.9476\n", "\u001b[1m313/313\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 7ms/step - accuracy: 0.7080 - loss: 0.9364\n", "Test accuracy: 0.7014999985694885\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense\n", "from tensorflow.keras.datasets import cifar10\n", "from tensorflow.keras.utils import to_categorical\n", "\n", "# Load and preprocess the CIFAR-10 dataset\n", "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", "x_train, x_test = x_train / 255.0, x_test / 255.0 # Normalize pixel values\n", "\n", "# One-hot encode the labels\n", "y_train = to_categorical(y_train, 10)\n", "y_test = to_categorical(y_test, 10)\n", "\n", "# Build a CNN model\n", "model = Sequential([\n", " Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),\n", " MaxPooling2D(2, 2),\n", " Conv2D(64, (3, 3), activation='relu'),\n", " MaxPooling2D(2, 2),\n", " Flatten(),\n", " Dense(128, activation='relu'),\n", " Dense(10, activation='softmax') # 10 classes in CIFAR-10\n", "])\n", "\n", "# Compile the model\n", "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\n", "\n", "# Train the model\n", "model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test))\n", "\n", "# Evaluate the model\n", "loss, accuracy = model.evaluate(x_test, y_test)\n", "print(f'Test accuracy: {accuracy}')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ conv2d (Conv2D)                 │ (None, 30, 30, 32)     │           896 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d (MaxPooling2D)    │ (None, 15, 15, 32)     │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_1 (Conv2D)               │ (None, 13, 13, 64)     │        18,496 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_1 (MaxPooling2D)  │ (None, 6, 6, 64)       │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten (Flatten)               │ (None, 2304)           │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense (Dense)                   │ (None, 128)            │       295,040 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_1 (Dense)                 │ (None, 10)             │         1,290 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m30\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m896\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m15\u001b[0m, \u001b[38;5;34m32\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m13\u001b[0m, \u001b[38;5;34m13\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m18,496\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2304\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m295,040\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m1,290\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 947,168 (3.61 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m947,168\u001b[0m (3.61 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 315,722 (1.20 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m315,722\u001b[0m (1.20 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Optimizer params: 631,446 (2.41 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Optimizer params: \u001b[0m\u001b[38;5;34m631,446\u001b[0m (2.41 MB)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model. summary()" ] } ], "metadata": { "kernelspec": { "display_name": "cv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }