|
| 1 | +# Image processing via convolution |
| 2 | +# Importing needed library |
| 3 | +import numpy as np |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +from scipy.misc import imread, imresize |
| 6 | + |
| 7 | +# Reading images |
| 8 | +cat, dog = imread('images/cat.jpg'), imread('images/dog.jpg') |
| 9 | + |
| 10 | +# Defining difference between width and height |
| 11 | +print(cat.shape) # (1080, 1920, 3) |
| 12 | +print(dog.shape) # (1050, 1680, 3) |
| 13 | +difference_cat = cat.shape[1] - cat.shape[0] |
| 14 | +difference_dog = dog.shape[1] - dog.shape[0] |
| 15 | +# Cropping images to make it square size |
| 16 | +# Cropping by width and taking middle part |
| 17 | +cat_cropped = cat[:, int(difference_cat / 2):int(-difference_cat / 2), :] |
| 18 | +dog_cropped = dog[:, int(difference_dog / 2):int(-difference_dog / 2), :] |
| 19 | +print(cat_cropped.shape) # (1080, 1080, 3) |
| 20 | +print(dog_cropped.shape) # (1050, 1050, 3) |
| 21 | + |
| 22 | +# Defining needed image size for resizing |
| 23 | +image_size = 200 |
| 24 | +# Defining output array for new images |
| 25 | +# For 2 images with height = width = image_size and 3 channels |
| 26 | +# (channels come at the end in order to show resized image) |
| 27 | +image_resized = np.zeros((2, image_size, image_size, 3)) |
| 28 | +print(image_resized.shape) # (2, 200, 200, 3) |
| 29 | +# Resizing two images |
| 30 | +image_resized[0, :, :, :] = imresize(cat_cropped, (image_size, image_size)) # (200, 200, 3) |
| 31 | +image_resized[1, :, :, :] = imresize(dog_cropped, (image_size, image_size)) # (200, 200, 3) |
| 32 | + |
| 33 | +# Preparing data for convolution operation |
| 34 | +# Defining output array for new image |
| 35 | +# For 2 images with 3 channels and height = width = image_size |
| 36 | +x = np.zeros((2, 3, image_size, image_size)) |
| 37 | +# Resizing two images |
| 38 | +# And transposing in order to put channels first |
| 39 | +x[0, :, :, :] = imresize(cat_cropped, (image_size, image_size)).transpose((2, 0, 1)) |
| 40 | +x[1, :, :, :] = imresize(dog_cropped, (image_size, image_size)).transpose((2, 0, 1)) |
| 41 | +print(x[0].shape) # (3, 200, 200) |
| 42 | +print(x[1].shape) # (3, 200, 200) |
| 43 | + |
| 44 | +# Preparing weights for convolution for 2 filters with 3 channels and size 3x3 |
| 45 | +# Defining array for weights |
| 46 | +w = np.zeros((2, 3, 3, 3)) |
| 47 | + |
| 48 | +# First filter converts images into grayscale |
| 49 | +# Defining three channels for this filter - red, green and blue |
| 50 | +w[0, 0, :, :] = [[0, 0, 0], [0, 0.3, 0], [0, 0, 0]] |
| 51 | +w[0, 1, :, :] = [[0, 0, 0], [0, 0.6, 0], [0, 0, 0]] |
| 52 | +w[0, 2, :, :] = [[0, 0, 0], [0, 0.1, 0], [0, 0, 0]] |
| 53 | + |
| 54 | +# Second filter will detect horizontal edges in the blue channel |
| 55 | +w[1, 2, :, :] = [[1, 2, 1], [0, 0, 0], [-1, -2, -1]] |
| 56 | + |
| 57 | +# Defining 128 biases for the edge detection filter |
| 58 | +# in order to make output non-negative |
| 59 | +b = np.array([0, 128]) |
| 60 | + |
| 61 | + |
| 62 | +""" |
| 63 | +Defining function for naive forward pass for convolutional layer |
| 64 | +Input consists of following: |
| 65 | +x of shape (N, C, H, W) - N data, each with C channels, height H and width W. |
| 66 | +w of shape (F, C, HH, WW) - We convolve each input with F different filters, |
| 67 | +where each filter spans all C channels; each filter has height HH and width WW. |
| 68 | +
|
| 69 | +'cnn_params' is a dictionary with following keys: |
| 70 | +'stride' - step for sliding |
| 71 | +'pad' - zero-pad frame around input |
| 72 | +
|
| 73 | +Function returns volume of feature maps of shape (N, F, H', W') where: |
| 74 | +H' = 1 + (H + 2 * pad - HH) / stride |
| 75 | +W' = 1 + (W + 2 * pad - WW) / stride |
| 76 | +
|
| 77 | +N here is the same as we have it as number of input images. |
| 78 | +F here is as number of channels of each N (that are now as feature maps) |
| 79 | +
|
| 80 | +""" |
| 81 | + |
| 82 | + |
| 83 | +def cnn_forward_naive(x, w, b, cnn_params): |
| 84 | + # Preparing parameters for convolution operation |
| 85 | + stride = cnn_params['stride'] |
| 86 | + pad = cnn_params['pad'] |
| 87 | + N, C, H, W = x.shape |
| 88 | + F, _, HH, WW = w.shape |
| 89 | + |
| 90 | + # Applying to the input image volume Pad frame with zero values for all channels |
| 91 | + # As we have in input x N as number of inputs, C as number of channels, |
| 92 | + # then we don't have to pad them |
| 93 | + # That's why we leave first two tuples with 0 - (0, 0), (0, 0) |
| 94 | + # And two last tuples with pad parameter - (pad, pad), (pad, pad) |
| 95 | + # In this way we pad only H and W of N inputs with C channels |
| 96 | + x_padded = np.pad(x, ((0, 0), (0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=0) |
| 97 | + |
| 98 | + # Defining spatial size of output image volume (feature maps) by following formulas: |
| 99 | + height_out = int(1 + (H + 2 * pad - HH) / stride) |
| 100 | + width_out = int(1 + (W + 2 * pad - WW) / stride) |
| 101 | + # Depth of output volume is number of filters which is F |
| 102 | + # And number of input images N remains the same - it is number of output image volumes now |
| 103 | + |
| 104 | + # Creating zero valued volume for output feature maps |
| 105 | + feature_maps = np.zeros((N, F, height_out, width_out)) |
| 106 | + |
| 107 | + # Implementing convolution through N input images, each with F filters |
| 108 | + # Also, with respect to C channels |
| 109 | + # For every image |
| 110 | + for n in range(N): |
| 111 | + # For every filter |
| 112 | + for f in range(F): |
| 113 | + # Defining variable for indexing height in output feature map |
| 114 | + # (because our step might not be equal to 1) |
| 115 | + height_index = 0 |
| 116 | + # Convolving every channel of the image with every channel of the current filter |
| 117 | + # Result is summed up |
| 118 | + # Going through all input image (2D convolution) through all channels |
| 119 | + for i in range(0, H, stride): |
| 120 | + # Defining variable for indexing width in output feature map |
| 121 | + # (because our step might not be equal to 1) |
| 122 | + width_index = 0 |
| 123 | + for j in range(0, W, stride): |
| 124 | + feature_maps[n, f, height_index, width_index] = \ |
| 125 | + np.sum(x_padded[n, :, i:i+HH, j:j+WW] * w[f, :, :, :]) + b[f] |
| 126 | + # Increasing index for width |
| 127 | + width_index += 1 |
| 128 | + # Increasing index for height |
| 129 | + height_index += 1 |
| 130 | + |
| 131 | + # Returning resulted volumes of feature maps and cash |
| 132 | + return feature_maps |
| 133 | + |
| 134 | + |
| 135 | +# Implementing convolution of each image with each filter and offsetting by bias |
| 136 | +results = cnn_forward_naive(x, w, b, {'stride': 1, 'pad': 1}) |
| 137 | +print(results.shape) # (2, 2, 200, 200) - two images with two channels |
| 138 | + |
| 139 | + |
| 140 | +# Creating function for normalizing resulted images |
| 141 | +def normalize_image(img): |
| 142 | + image_max, image_min = np.max(img), np.min(img) |
| 143 | + return 255 * (img - image_min) / (image_max - image_min) |
| 144 | + |
| 145 | + |
| 146 | +# Preparing figures for plotting |
| 147 | +figure_1, ax = plt.subplots(nrows=2, ncols=5) |
| 148 | +# 'ax 'is as (2, 5) np array and we can call each time ax[0, 0] |
| 149 | + |
| 150 | +# Plotting original, cropped and resized images |
| 151 | +# By adding 'astype' we convert float numbers to integer |
| 152 | +ax[0, 0].imshow(cat) |
| 153 | +ax[0, 0].set_title('Original (900, 1600, 3))') |
| 154 | +ax[0, 1].imshow(cat_cropped) |
| 155 | +ax[0, 1].set_title('Cropped (900, 900, 3)') |
| 156 | +ax[0, 2].imshow(image_resized[0, :, :, :].astype('int')) |
| 157 | +ax[0, 2].set_title('Resized (200, 200, 3)') |
| 158 | +ax[0, 3].imshow(normalize_image(results[0, 0]), cmap=plt.get_cmap('gray')) |
| 159 | +ax[0, 3].set_title('Grayscale') |
| 160 | +ax[0, 4].imshow(normalize_image(results[0, 1]), cmap=plt.get_cmap('gray')) |
| 161 | +ax[0, 4].set_title('Edges') |
| 162 | + |
| 163 | +ax[1, 0].imshow(dog) |
| 164 | +ax[1, 0].set_title('Original (1050, 1680, 3)') |
| 165 | +ax[1, 1].imshow(dog_cropped) |
| 166 | +ax[1, 1].set_title('Cropped (1050, 1050, 3)') |
| 167 | +ax[1, 2].imshow(image_resized[1, :, :, :].astype('int')) |
| 168 | +ax[1, 2].set_title('Resized (200, 200, 3)') |
| 169 | +ax[1, 3].imshow(normalize_image(results[1, 0]), cmap=plt.get_cmap('gray')) |
| 170 | +ax[1, 3].set_title('Grayscale') |
| 171 | +ax[1, 4].imshow(normalize_image(results[1, 1]), cmap=plt.get_cmap('gray')) |
| 172 | +ax[1, 4].set_title('Edges') |
| 173 | + |
| 174 | +# Setting axes 'off' |
| 175 | +for i in range(2): |
| 176 | + for j in range(5): |
| 177 | + ax[i, j].set_axis_off() |
| 178 | + |
| 179 | +# Giving the name to the window with figure |
| 180 | +figure_1.canvas.set_window_title('Image convolution') |
| 181 | +# Showing the plots |
| 182 | +plt.show() |
0 commit comments