Friday, January 19, 2024

ML: Training an image classifier from scratch on the Kaggle Cats vs Dogs dataset

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# https://keras.io/examples/vision/image_classification_from_scratch/

import os
import numpy as np
import keras
from keras import layers
from tensorflow import data as tf_data
import matplotlib.pyplot as plt

num_skipped=0; # the number of photos deleted
for folder_name in ("Cat", "Dog"):
    folder_path=os.path.join("D:\PyTest\PetImages", folder_name)
    for fname in os.listdir(folder_path):
        fpath=os.path.join(folder_path, fname)
        try:
            fobj=open(fpath, "rb") # "r" - Read & "b" - Binary - Binary mode
            is_jfif=b"JFIF" in fobj.peek(10)
        finally:
            fobj.close()

        if not is_jfif:
            num_skipped= num_skipped+1
            os.remove(fpath)

print(f"Deleted {num_skipped} images.")

image_size=(180, 180) # width, height
batch_size=128

train_ds, val_ds=keras.utils.image_dataset_from_directory(
    "D:\PyTest\PetImages",
    validation_split=0.2,  # fraction of data to reserve for validation
    subset="both",
    seed=1337, # Optional random seed for shuffling and transformations.
    image_size=image_size,
    batch_size=batch_size,
)

plt.figure(figsize=(10, 10))  # plot the images

for images, labels in train_ds.take(1):
    for i in range(9):
        ax=plt.subplot(3, 3, i+1)

        plt.imshow(np.array(images[i]).astype("uint8"))
        # plt.show()
        plt.title(int(labels[i]))
        plt.axis("off")

data_augmentation_layers=[
    layers.RandomFlip("horizontal"), # which flip mode to use. Can be "horizontal", "vertical", or "horizontal_and_vertical". "horizontal" is a left-right flip and "vertical" is a top-bottom flip. Defaults to "horizontal_and_vertical"
    layers.RandomRotation(0.1), # a float represented as fraction of 2 Pi, or a tuple of size 2 representing lower and upper bound for rotating clockwise and counter-clockwise.
]

def data_augmentation(images):   # a function is defined, all input images will be augmented and return.
    for layer in data_augmentation_layers:
        images=layer(images)

    return images

plt.figure(figsize=(10, 10))   # plot the images

for images, _ in train_ds.take(1):
    for i in range(9):
        augmented_images=data_augmentation(images)
        ax=plt.subplot(3, 3, i+1)

        plt.imshow(np.array(augmented_images[0]).astype("uint8"))

        plt.axis("off")

inputs=keras.Input(shape=image_size)
x=data_augmentation(inputs) # Call a function, we defined above.
x=layers.Rescaling(1./255)(x) # To rescale an input in the [0, 255] range to be in the [0, 1] range, you would pass scale=1./255.


# Apply 'data_augmentation' to the training images.
train_ds=train_ds.map(
    lambda img, label: (data_augmentation(img), label),
    num_parallel_calls=tf_data.AUTOTUNE,
)
# Prefetching samples in GPU memory helps maximize GPU utilization.
train_ds=train_ds.prefetch(tf_data.AUTOTUNE)
val_ds=val_ds.prefetch(tf_data.AUTOTUNE)

def make_model(input_shape, num_classes):  # Define a function
    inputs=keras.Input(shape=input_shape)

    #Entry block
    x=layers.Rescaling(1.0/255)(inputs) # To rescale an input in the [0, 255] range to be in the [0, 1] range, you would pass scale=1./255.
    x=layers.Conv2D(128,3,strides=2, padding="same")(x) # int, the dimension of the output space (the number of filters in the convolution).
    x=layers.BatchNormalization()(x)  # Layer that normalizes its inputs.
    x=layers.Activation("relu")(x)

    previous_block_activation=x

    for size in [256, 512, 728]:
        x=layers.Activation("relu")(x)
        x=layers.SeparableConv2D(size, 3, padding="same")(x)  # Depthwise separable 2D convolution
        x=layers.BatchNormalization()(x)  # Layer that normalizes its inputs.
        x=layers.Activation("relu")(x)
        x=layers.SeparableConv2D(size, 3,padding="same")(x)
        x=layers.BatchNormalization()(x)



        x=layers.MaxPooling2D(3, strides=2, padding="same")(x) # Max pooling operation for 2D spatial data.

        residual=layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x=layers.add([x,residual])
        previous_block_activation=x

    x=layers.SeparableConv2D(1024, 3, padding="same")(x)
    x=layers.BatchNormalization()(x)
    x=layers.Activation("relu")(x)


    x=layers.GlobalAveragePooling2D()(x)
    if num_classes==2:
        units=1
    else:
        units=num_classes



    x=layers.Dropout(0.25)(x)

    # We specify activation=None so as to return logits
    outputs=layers.Dense(units, activation=None)(x)
    return keras.Model(inputs, outputs)

img=keras.utils.load_img("D:\weed\PyTest\PetImages\Cat\6779.jpg", target_size=image_size)

img_array=keras.utils.img_to_array(img)
img_array=keras.ops.expand_dims(img_array, 0)


predictions=model.predict(img_array)
score=float(keras.ops.sigmoid(predictions[0][0]))
print(f"This image is {100*(1-score):.2f}% cat and {100*score:.2f}% dog.")

No comments:

Post a Comment