{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Computer Vision: Clasificación de Imágenes (pre-trained)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'c:\\\\Users\\\\CynYDie\\\\Desktop\\\\UTN_Haedo\\\\Clases\\\\Clase14_CV'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "os.getcwd()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clasificar imagen con ResNet50" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 4s/step\n", "Predicted:\n", "n02124075 (Egyptian_cat): 0.38\n", "n02123159 (tiger_cat): 0.16\n", "n02123045 (tabby): 0.16\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.keras.applications import ResNet50\n", "from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions\n", "import numpy as np\n", "\n", "resnet50_model = ResNet50(weights='imagenet')\n", "\n", "img_path = 'datasets/cat.jpeg'\n", "try:\n", " img = tf.keras.utils.load_img(img_path, target_size=(224, 224))\n", "except FileNotFoundError:\n", " print(f\"Error: Image file not found at {img_path}\")\n", " exit(1)\n", "\n", "x = tf.keras.utils.img_to_array(img)\n", "x = np.expand_dims(x, axis=0)\n", "x = preprocess_input(x)\n", "\n", "preds = resnet50_model.predict(x)\n", "print('Predicted:')\n", "for label, class_id, confidence in decode_predictions(preds, top=3)[0]:\n", " print(f\"{label} ({class_id}): {confidence:.2f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clasificar imagen con VGG16" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 4s/step\n", "Predicted:\n", "n02123159 (tiger_cat): 0.29\n", "n02123045 (tabby): 0.25\n", "n02124075 (Egyptian_cat): 0.24\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.keras.applications import VGG16\n", "from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions\n", "import numpy as np\n", "\n", "vgg16_model = VGG16(weights='imagenet')\n", "\n", "img_path = 'datasets/cat.jpeg'\n", "try:\n", " img = tf.keras.utils.load_img(img_path, target_size=(224, 224))\n", "except FileNotFoundError:\n", " print(f\"Error: Image file not found at {img_path}\")\n", " exit(1)\n", "\n", "x = tf.keras.utils.img_to_array(img)\n", "x = np.expand_dims(x, axis=0)\n", "x = preprocess_input(x)\n", "\n", "preds = vgg16_model.predict(x)\n", "print('Predicted:')\n", "for label, class_id, confidence in decode_predictions(preds, top=3)[0]:\n", " print(f\"{label} ({class_id}): {confidence:.2f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clasificar imagen con distintos modelos" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step\n", "WARNING:tensorflow:5 out of the last 5 calls to .one_step_on_data_distributed at 0x0000029547C85580> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 3s/step\n", "WARNING:tensorflow:6 out of the last 6 calls to .one_step_on_data_distributed at 0x0000029547C86DE0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 2s/step\n", "VGG16:\n", "n02123159 (tiger_cat): 0.29\n", "n02123045 (tabby): 0.25\n", "n02124075 (Egyptian_cat): 0.24\n", "\n", "ResNet50:\n", "n02124075 (Egyptian_cat): 0.38\n", "n02123159 (tiger_cat): 0.16\n", "n02123045 (tabby): 0.16\n", "\n", "MobileNetV2:\n", "n02123159 (tiger_cat): 0.64\n", "n02123045 (tabby): 0.13\n", "n02124075 (Egyptian_cat): 0.02\n", "\n", "Comparison of top 3 results:\n", "Rank 1: VGG16=n02123159 (tiger_cat), ResNet50=n02124075 (Egyptian_cat), MobileNetV2=n02123159 (tiger_cat)\n", "Rank 2: VGG16=n02123045 (tabby), ResNet50=n02123159 (tiger_cat), MobileNetV2=n02123045 (tabby)\n", "Rank 3: VGG16=n02124075 (Egyptian_cat), ResNet50=n02123045 (tabby), MobileNetV2=n02124075 (Egyptian_cat)\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow.keras.applications import VGG16, ResNet50, MobileNetV2\n", "from tensorflow.keras.applications.vgg16 import preprocess_input as vgg16_preprocess\n", "from tensorflow.keras.applications.resnet50 import preprocess_input as resnet50_preprocess\n", "from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenetv2_preprocess\n", "from tensorflow.keras.applications.vgg16 import decode_predictions\n", "import numpy as np\n", "\n", "# Load pre-trained models\n", "vgg16_model = VGG16(weights='imagenet')\n", "resnet50_model = ResNet50(weights='imagenet')\n", "mobilenetv2_model = MobileNetV2(weights='imagenet')\n", "\n", "# Load image\n", "img_path = 'datasets/cat.jpeg'\n", "try:\n", " img = tf.keras.utils.load_img(img_path, target_size=(224, 224))\n", "except FileNotFoundError:\n", " print(f\"Error: Image file not found at {img_path}\")\n", " exit(1)\n", "\n", "# Preprocess image for each model\n", "vgg16_x = tf.keras.utils.img_to_array(img)\n", "vgg16_x = np.expand_dims(vgg16_x, axis=0)\n", "vgg16_x = vgg16_preprocess(vgg16_x)\n", "\n", "resnet50_x = tf.keras.utils.img_to_array(img)\n", "resnet50_x = np.expand_dims(resnet50_x, axis=0)\n", "resnet50_x = resnet50_preprocess(resnet50_x)\n", "\n", "mobilenetv2_x = tf.keras.utils.img_to_array(img)\n", "mobilenetv2_x = np.expand_dims(mobilenetv2_x, axis=0)\n", "mobilenetv2_x = mobilenetv2_preprocess(mobilenetv2_x)\n", "\n", "# Make predictions\n", "vgg16_preds = vgg16_model.predict(vgg16_x)\n", "resnet50_preds = resnet50_model.predict(resnet50_x)\n", "mobilenetv2_preds = mobilenetv2_model.predict(mobilenetv2_x)\n", "\n", "# Decode predictions\n", "vgg16_decoded = decode_predictions(vgg16_preds, top=3)[0]\n", "resnet50_decoded = decode_predictions(resnet50_preds, top=3)[0]\n", "mobilenetv2_decoded = decode_predictions(mobilenetv2_preds, top=3)[0]\n", "\n", "# Print top 3 results for each model\n", "print(\"VGG16:\")\n", "for label, class_id, confidence in vgg16_decoded:\n", " print(f\"{label} ({class_id}): {confidence:.2f}\")\n", "\n", "print(\"\\nResNet50:\")\n", "for label, class_id, confidence in resnet50_decoded:\n", " print(f\"{label} ({class_id}): {confidence:.2f}\")\n", "\n", "print(\"\\nMobileNetV2:\")\n", "for label, class_id, confidence in mobilenetv2_decoded:\n", " print(f\"{label} ({class_id}): {confidence:.2f}\")\n", "\n", "# Compare top 3 results\n", "print(\"\\nComparison of top 3 results:\")\n", "for i in range(3):\n", " vgg16_label = vgg16_decoded[i][0]\n", " vgg16_class_id = vgg16_decoded[i][1]\n", " resnet50_label = resnet50_decoded[i][0]\n", " resnet50_class_id = resnet50_decoded[i][1]\n", " mobilenetv2_label = mobilenetv2_decoded[i][0]\n", " mobilenetv2_class_id = mobilenetv2_decoded[i][1]\n", " print(f\"Rank {i+1}: VGG16={vgg16_label} ({vgg16_class_id}), ResNet50={resnet50_label} ({resnet50_class_id}), MobileNetV2={mobilenetv2_label} ({mobilenetv2_class_id})\")" ] } ], "metadata": { "kernelspec": { "display_name": "cv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }