Tuesday, January 23, 2024

ML: Image segmentation with a U-Net-like architecture

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# https://keras.io/examples/vision/oxford_pets_image_segmentation/
import os

input_dir="images/"
target_dir="annotations/trimaps/"
img_size=(160, 160)
num_classes=3
batch_size=32

input_img_paths=sorted(
    [
        os.path.join(input_dir, fname)
        for fname in os.listdir(input_dir)
        if fname.endswith(".jpg")
    ]
)

target_img_paths=sorted(
    [
        os.path.join(target_dir, fname)
        for fname in os.listdir(target_dir)
        if fname.endswith(".png") and not fname.startswith(".")
    ]
)

print("Number of samples:", len(input_img_paths))

for input_path, target_path in zip(input_img_paths[:10], target_img_paths[:10]):
    print(input_path, "|", target_path)


from IPython.display import Image, display
from keras.utils import load_img
from PIL import ImageOps
import matplotlib.pyplot as plt

# Display input image #10
display(Image(filename=input_img_paths[9]))

# Display auto-contrast version of corresponding target (per-pixel categories)
img=ImageOps.autocontrast(load_img(target_img_paths[9]))
display(img)


import keras
import numpy as np
from tensorflow import data as tf_data
from tensorflow import image as tf_image
from tensorflow import io as tf_io

def get_dataset(
        batch_size,
        img_size,
        input_img_paths,
        target_img_paths,
        max_dataset_len=None,
):
    def load_img_masks(input_img_path, target_img_path):
        input_img=tf_io.read_file(input_img_path)  # Reads the contents of file.
        input_img=tf_io.decode_png(input_img, channels=3)  # Decode a PNG-encoded image to a uint8 or uint16 tensor.
        input_img=tf_image.resize(input_img, img_size)
        input_img=tf_image.convert_image_dtype(input_img, "float32")

        target_img=tf_io.read_file(target_img_path)
        target_img=tf_io.decode_png(target_img, channels=1)
        target_img=tf_image.resize(target_img, img_size, method="nearest")
        target_img=tf_image.convert_image_dtype(target_img, "uint8")

        # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
        target_img=target_img-1
        return input_img, target_img

    # For faster debugging, limit the size of data
    if max_dataset_len:  # checks if max_dataset_len has a truthy value. If max_dataset_len is None, 0, or even an empty container like [] or '', the condition will be False
        input_img_paths=input_img_paths[: max_dataset_len]
        target_img_paths=target_img_paths[:max_dataset_len]
    dataset=tf_data.Dataset.from_tensor_slices((input_img_paths, target_img_paths))  # Represents a potentially large set of elements
    dataset=dataset.map(load_img_masks, num_parallel_calls=tf_data.AUTOTUNE)
    return dataset.batch(batch_size)

from keras import layers

def get_model(img_size, num_classes):
    inputs=keras.Input(shape=img_size+(3, ))

    # Entry block
    x=layers.Conv2D(32, 3, strides=2, padding="same")(inputs)  # 2D convolution layer
    x=layers.BatchNormalization()(x)
    x=layers.Activation("relu")(x)

    previous_block_activation=x

    # Block 1, 2, 3 are identical apart from the feature depth
    for filters in [64, 128, 256]:
        x=layers.Activation("relu")(x)
        x=layers.SeparableConv2D(filters, 3, padding="same")(x)
        x=layers.BatchNormalization()(x)

        x=layers.Activation("relu")(x)
        x=layers.SeparableConv2D(filters, 3, padding="same")(x)
        x=layers.BatchNormalization()(x)

        x=layers.MaxPooling2D(3, strides=2, padding="same")(x)


        # Project residual
        residual=layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x=layers.add([x, residual])
        previous_block_activation=x

    for filters in [256, 128, 64,32]:
        x=layers.Activation("relu")(x)
        x=layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x=layers.BatchNormalization()(x)

        x=layers.Activation("relu")(x)
        x=layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x=layers.BatchNormalization()(x)

        x=layers.UpSampling2D(2)(x)

        # Project residual
        residual=layers.UpSampling2D(2)(previous_block_activation)
        residual=layers.Conv2D(filters, 1, padding="same")(residual)
        x=layers.add([x, residual])
        previous_block_activation=x

    # Add a per-pixel classification layer
    outputs=layers.Conv2D(num_classes,3, activation="softmax", padding="same")(x)


    model=keras.Model(inputs, outputs)
    return model

# Build model
model = get_model(img_size, num_classes)
# model=ComputeSumModel(get_model(img_size, num_classes))
model.summary()

import random

# Split our img paths into a training and a validation set
val_samples=1000
random.Random(1337).shuffle(input_img_paths)
random.Random(1337).shuffle(target_img_paths)

train_input_img_paths=input_img_paths[:-val_samples]
train_target_img_paths=target_img_paths[:-val_samples]
val_input_img_paths=input_img_paths[-val_samples:]
val_target_img_paths=target_img_paths[-val_samples:]


# Instantiate dataset for each split
# Limit input files in 'max_dataset_len' for faster epoch training time
# Remove the 'max_dataset_len' arg when running with full dataset
train_dataset=get_dataset(
    batch_size,
    img_size,
    train_input_img_paths,
    train_target_img_paths,
    max_dataset_len=1000,
)
valid_dataset=get_dataset(
    batch_size, img_size, val_input_img_paths, val_target_img_paths
)

# Configure the model for training.
# We use the "sparse" version of categorical_crossentropy
# because our target data is integers
model.compile(
    optimizer=keras.optimizers.Adam(1e-4), loss="sparse_categorical_crossentropy"
)

callbacks=[
    keras.callbacks.ModelCheckpoint("oxford_segmentation.keras", save_best_only=True)
]

# Train the model, doing validation at the end of each epoch
epochs=1

model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=callbacks,
    verbose=2,
)

# Generate predictions for all images in the validation set
val_dataset=get_dataset(
    batch_size, img_size, val_input_img_paths, val_target_img_paths
)
val_preds=model.predict(val_dataset)


def display_img(i):
    mask=np.argmax(val_preds[i], axis=-1)
    mask=np.expand_dims(mask, axis=-1)
    img=ImageOps.autocontrast(keras.utils.array_to_img(mask))
    display(img)



# Display results for validation image #10
i=10

# Display input image
display(Image(filename=val_input_img_paths[i]))

# Display ground-truth target mask
img=ImageOps.autocontrast(load_img(val_target_img_paths[i]))
display(img)

# Display mask predicted by our model
display_img(i)

No comments:

Post a Comment