#Machine Learning based scratches on printed paper detection, in high-speed printing systems

>**Universitat Oberta de Catalunya**
>
>**Màster Universitari en Enginyeria Informàtica**
>
>**Treball Final de Màster - Intel·ligència Artificial**
>
>
>**Professor responsable de l’assignatura:** Carles Ventura Royo
>
>**Consultor:** Antonio Burguera Burguera
>
>**Alumne:** Jordi Falcés i Valls
>
>
>**Idioma:** Anglès
>
>
>Desembre de 2021



#Environment

In [None]:
import tensorflow as tf
print(tf.test.gpu_device_name())

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

!cat /proc/meminfo

import subprocess
print((subprocess.check_output("lscpu", shell = True).strip()).decode())

import platform
print(platform.platform())

!nvidia-smi

#Creation of the DataSets

##Prepare the environment to create the dataset

In [None]:
# Prepare the environment to create the datasets

!rm -rf datasets

# For the dataset with scratches

!mkdir datasets
!mkdir datasets/scratches
!mkdir datasets/scratches/defects
!mkdir datasets/scratches/defects/tiles64
!mkdir datasets/scratches/defects/tiles64/color
!mkdir datasets/scratches/defects/tiles64/grayscale
!mkdir datasets/scratches/defects/tiles320
!mkdir datasets/scratches/defects/tiles320/color
!mkdir datasets/scratches/defects/tiles320/grayscale

!cp drive/MyDrive/Transfer/UOC/TFM/datasets/scratches/defects/*.bmp datasets/scratches/defects/.

In [None]:
# Shows the images of the original set of BMP images that contain scratches,
# obtained from the Vision System of an HP PageWide Web Press T250HD

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects"
filetype = "*.bmp"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  imgs = imread(images[i])
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

##Generates dataset from original captured images

In [None]:
# Generates dataset from original captured images

import glob

import cv2
import math

from PIL import Image

datasetspath = '/content/datasets/'
datasetfolder = 'scratches/'

##Generates 64x64px tiles from all available BMP images containing defects

In [None]:
# Generates 64x64px tiles from all available BMP images containing defects
# 8s - 26266 tiles

tile_size = (64, 64)
offset = (64, 64)
counter = 0

for image in glob.iglob(datasetspath + datasetfolder + 'defects/*.bmp'):
  img = cv2.imread(image)
  img_shape = img.shape
  for i in range(int(math.ceil(img_shape[0] / (offset[1] * 1.0)))):
    for j in range(int(math.ceil(img_shape[1] / (offset[0] * 1.0)))):
      cropped_img = img[offset[1] * i:min(offset[1] * i + tile_size[1], 
                                          img_shape[0]), offset[0] * 
                        j:min(offset[0] * j + tile_size[0], img_shape[1])]
      cv2.imwrite(datasetspath + datasetfolder + 'defects/tiles64/color/' +
                  'def' + image.lstrip(datasetspath + datasetfolder + 
                                       'defects/') + '_' + str(i) + '_' +
                  str(j) + '.png', cropped_img)
      counter = counter + 1

print('Tiles = ' + str(counter))

In [None]:
# Shows a random subset of the Color 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/color/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

##Generates 320x320px tiles from all available BMP images containing defects

In [None]:
# Generates 320x320px tiles from all available BMP images containing defects
# 6s - 1200 tiles

tile_size = (320, 320)
offset = (320, 320)

counter = 0

for image in glob.iglob(datasetspath + datasetfolder + 'defects/*.bmp'):
  img = cv2.imread(image)
  img_shape = img.shape
  for i in range(int(math.ceil(img_shape[0] / (offset[1] * 1.0)))):
    for j in range(int(math.ceil(img_shape[1] / (offset[0] * 1.0)))):
      cropped_img = img[offset[1] * i:min(offset[1] * i + tile_size[1], 
                                          img_shape[0]), offset[0] *
                        j:min(offset[0] * j + tile_size[0], img_shape[1])]
      cv2.imwrite(datasetspath + datasetfolder + 'defects/tiles320/color/' + 
                  'def' + image.lstrip(datasetspath + datasetfolder + 
                                       'defects/') + '_' + str(i) + '_' +
                  str(j) + '.png', cropped_img)
      counter = counter + 1

print('Tiles = ' + str(counter))

In [None]:
# Shows a random subset of the Color 320x320px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/color/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

##Converts 64x64px Color tiles to Grayscale

In [None]:
# Converts 64x64px Color tiles to Grayscale
# 24s - 26266 images

counter = 0

for imagecolor in glob.iglob(datasetspath + datasetfolder +
                             'defects/tiles64/color/*.png'):
    img = Image.open(imagecolor).convert('L')
    img.save(datasetspath + datasetfolder + 'defects/tiles64/grayscale/' + 
             'def' + imagecolor.lstrip(datasetspath + datasetfolder + 
                                       'defects/tiles64/color/'))
    counter = counter + 1

print('Images = ' + str(counter))

In [None]:
# Shows a random subset of the Grayscale 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/grayscale/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

##Converts 320x320px Color tiles to Grayscale

In [None]:
# Converts 320x320px Color tiles to Grayscale
# 21s - 1200 images

counter = 0

for imagecolor in glob.iglob(datasetspath + datasetfolder +
                             'defects/tiles320/color/*.png'):
    img = Image.open(imagecolor).convert('L')
    img.save(datasetspath + datasetfolder + 'defects/tiles320/grayscale/' + 
             'def' + imagecolor.lstrip(datasetspath + datasetfolder + 
                                       'defects/tiles320/color/'))
    counter = counter + 1

print('Images = ' + str(counter))

In [None]:
# Shows a random subset of the Grayscale 320x320px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/grayscale/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

##Zips the datasets folder

In [None]:
# Zips the datasets folder
# 140s

!zip -r datasets.zip datasets

##Copies the datasets.zip file to Google Drive

In [None]:
# Copies the datasets.zip file to Google Drive
# 8s

!cp datasets.zip drive/MyDrive/Transfer/UOC/TFM/.

##Sorting of the generated dataset by human being

At this point, the generated dataset needs to be sorted by human beings.
The grayscale and color tiles of 64x64px and 320x320px, from original captures known to contain scratches, have been sorted as having scratches (scratched) or not having scratches (notscratched).

Tiles smaller than 64x64px and 320x320px have been removed.

##Downloads the sorted dataset to the local environment

In [None]:
# Downloads the sorted dataset to the local environment
# 60s

!mkdir datasets
!mkdir datasets/scratches

!cp drive/MyDrive/Transfer/UOC/TFM/datasets/scratches/defects.zip datasets/scratches/.

!unzip datasets/scratches/defects.zip -d datasets/scratches/

In [None]:
# Shows a random subset of the Scratched Grayscale 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/grayscale/scratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

In [None]:
# Shows a random subset of the Not-Scratched Grayscale 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/grayscale/notscratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

In [None]:
# Shows a random subset of the Scratched Color 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/color/scratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

In [None]:
# Shows a random subset of the Not-Scratched Color 64x64px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles64/color/notscratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

In [None]:
# Shows a random subset of the Scratched Grayscale 320x320 tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/grayscale/scratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

In [None]:
# Shows a random subset of the Not-Scratched Grayscale 320x320px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/grayscale/notscratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs, cmap = 'gray')

plt.show()

In [None]:
# Shows a random subset of the Scratched Color 320x320px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/color/scratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

In [None]:
# Shows a random subset of the Not-Scratched Color 320x320px tiles

import random
import glob, os
from PIL import Image
from skimage.io import imread

import matplotlib.pyplot as plt

folder = "/content/datasets/scratches/defects/tiles320/color/notscratched/"
filetype = "*.png"

images = []

os.chdir(folder)
for file in glob.glob(filetype):
  images.append(os.path.join(folder, file))

plt.figure(1, figsize = (12, 10))
plt.axis('off')
n = 0
for i in range(16):
  n += 1
  randomimg = random.choice(images)
  imgs = imread(randomimg)
  plt.subplot(4, 4, n)
  plt.axis('off')
  plt.imshow(imgs)

plt.show()

##Creates balanced datasets

In [None]:
# Creates a balanced 64x64px grayscale dataset - Step 1
# 1s

!rm -rf datasets/scratches/defects/tiles64/grayscale/balanced

!mkdir datasets/scratches/defects/tiles64/grayscale/balanced
!mkdir datasets/scratches/defects/tiles64/grayscale/balanced/scratched
!mkdir datasets/scratches/defects/tiles64/grayscale/balanced/notscratched

print('Files in datasets/scratches/defects/tiles64/grayscale/scratched/:')
!ls datasets/scratches/defects/tiles64/grayscale/scratched | wc -l
print('Files in datasets/scratches/defects/tiles64/grayscale/notscratched/:')
!ls datasets/scratches/defects/tiles64/grayscale/notscratched | wc -l

!cp datasets/scratches/defects/tiles64/grayscale/scratched/*.png datasets/scratches/defects/tiles64/grayscale/balanced/scratched/.

In [None]:
# Creates a balanced 64x64px grayscale dataset - Step 2
# 1s

import os
import shutil
import random

files = [file for file in
         os.listdir('datasets/scratches/defects/tiles64/grayscale/notscratched/')
         if os.path.isfile(os.path.join('datasets/scratches/defects/tiles64/grayscale/notscratched/',
                                        file))]

random_amount = 1222

for x in range(random_amount):
  selection = random.randint(0, len(files)-1)
  file = files.pop(selection)
  shutil.copyfile(os.path.join('datasets/scratches/defects/tiles64/grayscale/notscratched',
                               file), os.path.join('datasets/scratches/defects/tiles64/grayscale/balanced/notscratched',
                                                   file))

print('Files in datasets/scratches/defects/tiles64/grayscale/balanced/scratched/:')
!ls datasets/scratches/defects/tiles64/grayscale/balanced/scratched | wc -l
print('Files in datasets/scratches/defects/tiles64/grayscale/balanced/notscratched/:')
!ls datasets/scratches/defects/tiles64/grayscale/balanced/notscratched | wc -l

In [None]:
# Creates a balanced 320x320px grayscale dataset - Step 1
# 1s

!rm -rf datasets/scratches/defects/tiles320/grayscale/balanced

!mkdir datasets/scratches/defects/tiles320/grayscale/balanced
!mkdir datasets/scratches/defects/tiles320/grayscale/balanced/scratched
!mkdir datasets/scratches/defects/tiles320/grayscale/balanced/notscratched

print('Files in datasets/scratches/defects/tiles320/grayscale/scratched/:')
!ls datasets/scratches/defects/tiles320/grayscale/scratched | wc -l
print('Files in datasets/scratches/defects/tiles320/grayscale/notscratched/:')
!ls datasets/scratches/defects/tiles320/grayscale/notscratched | wc -l

!cp datasets/scratches/defects/tiles320/grayscale/scratched/*.png datasets/scratches/defects/tiles320/grayscale/balanced/scratched/.


In [None]:
# Creates a balanced 320x320px grayscale dataset - Step 2
# 1s

import os
import shutil
import random

files = [file for file in 
         os.listdir('datasets/scratches/defects/tiles320/grayscale/notscratched/') 
         if os.path.isfile(os.path.join('datasets/scratches/defects/tiles320/grayscale/notscratched/', 
                                        file))]

random_amount = 114

for x in range(random_amount):
  selection = random.randint(0, len(files)-1)
  file = files.pop(selection)
  shutil.copyfile(os.path.join('datasets/scratches/defects/tiles320/grayscale/notscratched', 
                               file), os.path.join('datasets/scratches/defects/tiles320/grayscale/balanced/notscratched', 
                                                   file))

print('Files in datasets/scratches/defects/tiles320/grayscale/balanced/scratched/:')
!ls datasets/scratches/defects/tiles320/grayscale/balanced/scratched | wc -l
print('Files in datasets/scratches/defects/tiles320/grayscale/balanced/notscratched/:')
!ls datasets/scratches/defects/tiles320/grayscale/balanced/notscratched | wc -l


In [None]:
# Creates a balanced 64x6px color dataset - Step 1
# 1s

!rm -rf datasets/scratches/defects/tiles64/color/balanced

!mkdir datasets/scratches/defects/tiles64/color/balanced
!mkdir datasets/scratches/defects/tiles64/color/balanced/scratched
!mkdir datasets/scratches/defects/tiles64/color/balanced/notscratched

print('Files in datasets/scratches/defects/tiles64/color/scratched/:')
!ls datasets/scratches/defects/tiles64/color/scratched | wc -l
print('Files in datasets/scratches/defects/tiles64/color/notscratched/:')
!ls datasets/scratches/defects/tiles64/color/notscratched | wc -l

!cp datasets/scratches/defects/tiles64/color/scratched/*.png datasets/scratches/defects/tiles64/color/balanced/scratched/.


In [None]:
# Creates a balanced 64x64px color dataset - Step 2
# 1s

import os
import shutil
import random

files = [file for file in 
         os.listdir('datasets/scratches/defects/tiles64/color/notscratched/') 
         if os.path.isfile(os.path.join('datasets/scratches/defects/tiles64/color/notscratched/', 
                                        file))]

random_amount = 1222

for x in range(random_amount):
  selection = random.randint(0, len(files)-1)
  file = files.pop(selection)
  shutil.copyfile(os.path.join('datasets/scratches/defects/tiles64/color/notscratched', 
                               file), os.path.join('datasets/scratches/defects/tiles64/color/balanced/notscratched', 
                                                   file))

print('Files in datasets/scratches/defects/tiles64/color/balanced/scratched/:')
!ls datasets/scratches/defects/tiles64/color/balanced/scratched | wc -l
print('Files in datasets/scratches/defects/tiles64/color/balanced/notscratched/:')
!ls datasets/scratches/defects/tiles64/color/balanced/notscratched | wc -l


In [None]:
# Creates a balanced 320x320px color dataset - Step 1
# 1s

!rm -rf datasets/scratches/defects/tiles320/color/balanced

!mkdir datasets/scratches/defects/tiles320/color/balanced
!mkdir datasets/scratches/defects/tiles320/color/balanced/scratched
!mkdir datasets/scratches/defects/tiles320/color/balanced/notscratched

print('Files in datasets/scratches/defects/tiles320/color/scratched/:')
!ls datasets/scratches/defects/tiles320/color/scratched | wc -l
print('Files in datasets/scratches/defects/tiles320/color/notscratched/:')
!ls datasets/scratches/defects/tiles320/color/notscratched | wc -l

!cp datasets/scratches/defects/tiles320/color/scratched/*.png datasets/scratches/defects/tiles320/color/balanced/scratched/.


In [None]:
# Creates a balanced 320x320px color dataset - Step 2
# 1s

import os
import shutil
import random

files = [file for file in 
         os.listdir('datasets/scratches/defects/tiles320/color/notscratched/') 
         if os.path.isfile(os.path.join('datasets/scratches/defects/tiles320/color/notscratched/', 
                                        file))]

random_amount = 114

for x in range(random_amount):
  selection = random.randint(0, len(files)-1)
  file = files.pop(selection)
  shutil.copyfile(os.path.join('datasets/scratches/defects/tiles320/color/notscratched', 
                               file), os.path.join('datasets/scratches/defects/tiles320/color/balanced/notscratched', 
                                                   file))

print('Files in datasets/scratches/defects/tiles320/color/balanced/scratched/:')
!ls datasets/scratches/defects/tiles320/color/balanced/scratched | wc -l
print('Files in datasets/scratches/defects/tiles320/color/balanced/notscratched/:')
!ls datasets/scratches/defects/tiles320/color/balanced/notscratched | wc -l


##Pepares the datasets to be used with Augmentation (CSV)

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Balanced 64x64 grayscale
# 1s

!rm -rf datasets/scratches/defects/tiles64/grayscale/balancedall
!mkdir datasets/scratches/defects/tiles64/grayscale/balancedall

!ls datasets/scratches/defects/tiles64/grayscale/balanced/scratched > scratched.txt
!ls datasets/scratches/defects/tiles64/grayscale/balanced/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv

!cp datasets/scratches/defects/tiles64/grayscale/balanced/scratched/* datasets/scratches/defects/tiles64/grayscale/balancedall/.
!cp datasets/scratches/defects/tiles64/grayscale/balanced/notscratched/* datasets/scratches/defects/tiles64/grayscale/balancedall/.

!cat datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Imbalanced 64x64 grayscale
# 80s

!rm -rf datasets/scratches/defects/tiles64/grayscale/imbalancedall
!mkdir datasets/scratches/defects/tiles64/grayscale/imbalancedall

!ls datasets/scratches/defects/tiles64/grayscale/scratched > scratched.txt
!ls datasets/scratches/defects/tiles64/grayscale/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles64/grayscale/imbalanced_scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles64/grayscale/imbalanced_scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles64/grayscale/imbalanced_scratched_notscratched.csv

!for i in datasets/scratches/defects/tiles64/grayscale/scratched/*; do cp "$i" datasets/scratches/defects/tiles64/grayscale/imbalancedall/.; done
!for i in datasets/scratches/defects/tiles64/grayscale/notscratched/*; do cp "$i" datasets/scratches/defects/tiles64/grayscale/imbalancedall/.; done

!cat datasets/scratches/defects/tiles64/grayscale/imbalanced_scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Balanced 320x320 grayscale
# 1s

!rm -rf datasets/scratches/defects/tiles320/grayscale/balancedall
!mkdir datasets/scratches/defects/tiles320/grayscale/balancedall

!ls datasets/scratches/defects/tiles320/grayscale/balanced/scratched > scratched.txt
!ls datasets/scratches/defects/tiles320/grayscale/balanced/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv

!cp datasets/scratches/defects/tiles320/grayscale/balanced/scratched/* datasets/scratches/defects/tiles320/grayscale/balancedall/.
!cp datasets/scratches/defects/tiles320/grayscale/balanced/notscratched/* datasets/scratches/defects/tiles320/grayscale/balancedall/.

!cat datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Imbalanced 320x320 grayscale
# 1s

!rm -rf datasets/scratches/defects/tiles320/grayscale/imbalancedall
!mkdir datasets/scratches/defects/tiles320/grayscale/imbalancedall

!ls datasets/scratches/defects/tiles320/grayscale/scratched > scratched.txt
!ls datasets/scratches/defects/tiles320/grayscale/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles320/grayscale/imbalanced_scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles320/grayscale/imbalanced_scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles320/grayscale/imbalanced_scratched_notscratched.csv

!cp datasets/scratches/defects/tiles320/grayscale/scratched/* datasets/scratches/defects/tiles320/grayscale/imbalancedall/.
!cp datasets/scratches/defects/tiles320/grayscale/notscratched/* datasets/scratches/defects/tiles320/grayscale/imbalancedall/.

!cat datasets/scratches/defects/tiles320/grayscale/imbalanced_scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Balanced 64x64 color
# 1s

!rm -rf datasets/scratches/defects/tiles64/color/balancedall
!mkdir datasets/scratches/defects/tiles64/color/balancedall

!ls datasets/scratches/defects/tiles64/color/balanced/scratched > scratched.txt
!ls datasets/scratches/defects/tiles64/color/balanced/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles64/color/scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles64/color/scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles64/color/scratched_notscratched.csv

!cp datasets/scratches/defects/tiles64/color/balanced/scratched/* datasets/scratches/defects/tiles64/color/balancedall/.
!cp datasets/scratches/defects/tiles64/color/balanced/notscratched/* datasets/scratches/defects/tiles64/color/balancedall/.

!cat datasets/scratches/defects/tiles64/color/scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Imbalanced 64x64 color
# 80s

!rm -rf datasets/scratches/defects/tiles64/color/imbalancedall
!mkdir datasets/scratches/defects/tiles64/color/imbalancedall

!ls datasets/scratches/defects/tiles64/color/scratched > scratched.txt
!ls datasets/scratches/defects/tiles64/color/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles64/color/imbalanced_scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles64/color/imbalanced_scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles64/color/imbalanced_scratched_notscratched.csv

!for i in datasets/scratches/defects/tiles64/color/scratched/*; do cp "$i" datasets/scratches/defects/tiles64/color/imbalancedall/.; done
!for i in datasets/scratches/defects/tiles64/color/notscratched/*; do cp "$i" datasets/scratches/defects/tiles64/color/imbalancedall/.; done

!cat datasets/scratches/defects/tiles64/color/imbalanced_scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Balanced 320x320 color
# 1s

!rm -rf datasets/scratches/defects/tiles320/color/balancedall
!mkdir datasets/scratches/defects/tiles320/color/balancedall

!ls datasets/scratches/defects/tiles320/color/balanced/scratched > scratched.txt
!ls datasets/scratches/defects/tiles320/color/balanced/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles320/color/scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles320/color/scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles320/color/scratched_notscratched.csv

!cp datasets/scratches/defects/tiles320/color/balanced/scratched/* datasets/scratches/defects/tiles320/color/balancedall/.
!cp datasets/scratches/defects/tiles320/color/balanced/notscratched/* datasets/scratches/defects/tiles320/color/balancedall/.

!cat datasets/scratches/defects/tiles320/color/scratched_notscratched.csv

In [None]:
# Creates the Datasets and the CSV file for Scratched and NotScratched
# Imbalanced 320x320 color
# 1s

!rm -rf datasets/scratches/defects/tiles320/color/imbalancedall
!mkdir datasets/scratches/defects/tiles320/color/imbalancedall

!ls datasets/scratches/defects/tiles320/color/scratched > scratched.txt
!ls datasets/scratches/defects/tiles320/color/notscratched > notscratched.txt

!sed -i 's/.png/.png,1/g' scratched.txt
!sed -i 's/.png/.png,0/g' notscratched.txt

!echo "image_name,scratched_notscratched" > datasets/scratches/defects/tiles320/color/imbalanced_scratched_notscratched.csv
!cat scratched.txt >> datasets/scratches/defects/tiles320/color/imbalanced_scratched_notscratched.csv
!cat notscratched.txt >> datasets/scratches/defects/tiles320/color/imbalanced_scratched_notscratched.csv

!cp datasets/scratches/defects/tiles320/color/scratched/* datasets/scratches/defects/tiles320/color/imbalancedall/.
!cp datasets/scratches/defects/tiles320/color/notscratched/* datasets/scratches/defects/tiles320/color/imbalancedall/.

!cat datasets/scratches/defects/tiles64/color/imbalanced_scratched_notscratched.csv

##Zips de datasets folder (Phase 1 complete) and copies it to Google Drive

In [None]:
# Zips the datasets folder and copies it to Google Drive
# 185s
!rm -rf datasets/scratches/datasets.zip
!zip -r datasetsPhase1.zip datasets

!cp datasetsPhase1.zip drive/MyDrive/Transfer/UOC/TFM/.

#Data Augmentation

##Downloads Phase 1 complete dataset to the local environment

In [None]:
# Downloads the Phase1 dataset to the local environment
# 120s

!rm -rf datasets

!cp drive/MyDrive/Transfer/UOC/TFM/datasetsPhase1.zip .

!unzip datasetsPhase1.zip -d .

##Creates and defines the iterator to augment the dataset

In [None]:
# Creates and defines the iterator to augment the dataset

from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
        #rotation_range = 10, fill_mode = 'nearest', # Rotation
        #width_shift_range = 0.2,                    # Horizontal shift
        #height_shift_range = 0.2,                   # Vertical shift
        horizontal_flip = True,                     # Horizontal flip
        vertical_flip = True,                       # Vertical flip
        zoom_range = 0.2,                           # Zoom
        brightness_range = [0.2, 1.2])              # Brightness

In [None]:
# Shows samples of the resulting dataset, after augmentation
# Balanced Grayscale 64x64px tiles
# 1s

import matplotlib.pyplot as plt

generator = datagen.flow_from_directory(
    directory = 'datasets/scratches/defects/tiles64/grayscale/balanced',
    target_size = (64, 64), # Resize to this size
    color_mode = "grayscale", # For grayscale images
    batch_size = 1, # Number of images to extract from folder for every batch
    class_mode = "binary", # Classes to predict
    seed = 2021 # To make the result reproducible
    )

plt.figure(figsize = (12, 10))

for i in range(16):
    image, label = generator.next()
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow(image[0].squeeze(), cmap='gray')
    plt.title(int(label[0]))
    plt.axis("off")

plt.show()

In [None]:
# Shows samples of the resulting dataset, after augmentation
# Balanced Color 64x64px tiles
# 1s

import matplotlib.pyplot as plt
import numpy as np

generator = datagen.flow_from_directory(
    directory = 'datasets/scratches/defects/tiles64/color/balanced',
    target_size = (64, 64), # Resize to this size
    color_mode = "rgb", # For color images
    batch_size = 1, # Number of images to extract from folder for every batch
    class_mode = "binary", # Classes to predict
    seed = 2021 # To make the result reproducible
    )

plt.figure(figsize = (12, 10))

for i in range(16):
    image, label = generator.next()
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow((image[0]).astype(np.uint8))
    plt.title(int(label[0]))
    plt.axis("off")

plt.show()

In [None]:
# Shows samples of the resulting dataset, after augmentation
# Balanced Grayscale 320x320px tiles
# 1s

import matplotlib.pyplot as plt

generator = datagen.flow_from_directory(
    directory = 'datasets/scratches/defects/tiles320/grayscale/balanced',
    target_size = (320, 320), # Resize to this size
    color_mode = "grayscale", # For color grayscale
    batch_size = 1, # Number of images to extract from folder for every batch
    class_mode = "binary", # Classes to predict
    seed = 2021 # To make the result reproducible
    )

plt.figure(figsize = (12, 10))

for i in range(16):
    image, label = generator.next()
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow(image[0].squeeze(), cmap='gray')
    plt.title(int(label[0]))
    plt.axis("off")

plt.show()

In [None]:
# Shows samples of the resulting dataset, after augmentation
# Balanced Color 320x320px tiles
# 1s

import matplotlib.pyplot as plt
import numpy as np

generator = datagen.flow_from_directory(
    directory = 'datasets/scratches/defects/tiles320/color/balanced',
    target_size = (320, 320), # Resize to this size
    color_mode = "rgb", # For color images
    batch_size = 1, # Number of images to extract from folder for every batch
    class_mode = "binary", # Classes to predict
    seed = 2021 # To make the result reproducible
    )

plt.figure(figsize = (12, 10))

for i in range(16):
    image, label = generator.next()
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow((image[0]).astype(np.uint8))
    plt.title(int(label[0]))
    plt.axis("off")

plt.show()

#Modelling

In [None]:
# Preparing to run models
# 1s

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout, Flatten, Dense, Conv2D
from tensorflow.keras.layers import MaxPooling2D
import tensorflow.keras

##Balanced 320x320px Color

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles320/color/balancedall/train
!rm -rf datasets/scratches/defects/tiles320/color/balancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles320/color/balancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles320/color/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays
# 1s

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image) # Color
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image) # Color
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 37s

# Model architechture
model_exp1 = Sequential()

model_exp1.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 3)))
model_exp1.add(MaxPooling2D(pool_size = (2, 2)))

model_exp1.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp1.add(MaxPooling2D(pool_size = (2, 2)))

model_exp1.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp1.add(MaxPooling2D(pool_size = (2, 2)))

model_exp1.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp1.add(MaxPooling2D(pool_size = (2, 2)))

model_exp1.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp1.add(MaxPooling2D(pool_size = (2, 2)))

model_exp1.add(Flatten())
model_exp1.add(Dense(64, activation = 'relu'))
model_exp1.add(Dropout(0.24))
model_exp1.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp1.summary()

# Compiles the model
model_exp1.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp1 = model_exp1.fit(x_train, y_train,
                              epochs = 35, 
                              validation_data = (x_valid, y_valid),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way
# 1s

import matplotlib.pyplot as plt

# List all data in history
print(history_exp1.history.keys())

# Summarize history for accuracy
plt.plot(history_exp1.history['accuracy'])
plt.plot(history_exp1.history['val_accuracy'])
plt.title('Model accuracy (Balanced 320x320px Color WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp1.history['loss'])
plt.plot(history_exp1.history['val_loss'])
plt.title('Model loss (Balanced 320x320px Color WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 10m

# Model architechture
model_exp2 = Sequential()

model_exp2.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 3)))
model_exp2.add(MaxPooling2D(pool_size = (2, 2)))

model_exp2.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp2.add(MaxPooling2D(pool_size = (2, 2)))

model_exp2.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp2.add(MaxPooling2D(pool_size = (2, 2)))

model_exp2.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp2.add(MaxPooling2D(pool_size = (2, 2)))

model_exp2.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp2.add(MaxPooling2D(pool_size = (2, 2)))

model_exp2.add(Flatten())
model_exp2.add(Dense(64, activation = 'relu'))
model_exp2.add(Dropout(0.24))
model_exp2.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp2.summary()

# Compiles the model
model_exp2.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp2 = model_exp2.fit(datagen.flow(x_train, y_train,
                                           seed = 2021,
                                           shuffle = True),
                              epochs = 100,
                              validation_data = datagen.flow(x_valid, y_valid,
                                                             seed = 2021,
                                                             shuffle = True),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp2.history.keys())

# Summarize history for accuracy
plt.plot(history_exp2.history['accuracy'])
plt.plot(history_exp2.history['val_accuracy'])
plt.title('Model accuracy (Balanced 320x320px Color WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp2.history['loss'])
plt.plot(history_exp2.history['val_loss'])
plt.title('Model loss (Balanced 320x320px Color WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Imbalanced 320x320px Color

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles320/color/imbalancedall/train
!rm -rf datasets/scratches/defects/tiles320/color/imbalancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles320/color/imbalancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles320/color/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image) # Color
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image) # Color
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 33s

# Model architechture
model_exp3 = Sequential()

model_exp3.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 3)))
model_exp3.add(MaxPooling2D(pool_size = (2, 2)))

model_exp3.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp3.add(MaxPooling2D(pool_size = (2, 2)))

model_exp3.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp3.add(MaxPooling2D(pool_size = (2, 2)))

model_exp3.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp3.add(MaxPooling2D(pool_size = (2, 2)))

model_exp3.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp3.add(MaxPooling2D(pool_size = (2, 2)))

model_exp3.add(Flatten())
model_exp3.add(Dense(64, activation = 'relu'))
model_exp3.add(Dropout(0.24))
model_exp3.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp3.summary()

# Compiles the model
model_exp3.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp3 = model_exp3.fit(x_train, y_train,
                              epochs = 35, 
                              validation_data = (x_valid, y_valid),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp3.history.keys())

# Summarize history for accuracy
plt.plot(history_exp3.history['accuracy'])
plt.plot(history_exp3.history['val_accuracy'])
plt.title('Model accuracy (Imbalanced 320x320px Color WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp3.history['loss'])
plt.plot(history_exp3.history['val_loss'])
plt.title('Model loss (Imbalanced 320x320px Color WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 10m

# Model architechture
model_exp4 = Sequential()

model_exp4.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 3)))
model_exp4.add(MaxPooling2D(pool_size = (2, 2)))

model_exp4.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp4.add(MaxPooling2D(pool_size = (2, 2)))

model_exp4.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp4.add(MaxPooling2D(pool_size = (2, 2)))

model_exp4.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp4.add(MaxPooling2D(pool_size = (2, 2)))

model_exp4.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp4.add(MaxPooling2D(pool_size = (2, 2)))

model_exp4.add(Flatten())
model_exp4.add(Dense(64, activation = 'relu'))
model_exp4.add(Dropout(0.24))
model_exp4.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp4.summary()

# Compiles the model
model_exp4.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp4 = model_exp4.fit(datagen.flow(x_train, y_train,
                                           seed = 2021,
                                           shuffle = True),
                              epochs = 100,
                              validation_data = datagen.flow(x_valid, y_valid,
                                                             seed = 2021,
                                                             shuffle = True),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp4.history.keys())

# Summarize history for accuracy
plt.plot(history_exp4.history['accuracy'])
plt.plot(history_exp4.history['val_accuracy'])
plt.title('Model accuracy (Imbalanced 320x320px Color WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp4.history['loss'])
plt.plot(history_exp4.history['val_loss'])
plt.title('Model loss (Imbalanced 320x320px Color WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Balanced 320x320px Grayscale

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles320/grayscale/balancedall/train
!rm -rf datasets/scratches/defects/tiles320/grayscale/balancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles320/grayscale/balancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image, 0) # Grayscale
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image, 0) # Grayscale
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

x_train = x_train.reshape(182, 320, 320, 1)
x_valid = x_valid.reshape(46, 320, 320, 1)

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 42s

# Model architechture
model_exp5 = Sequential()

model_exp5.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 1)))
model_exp5.add(MaxPooling2D(pool_size = (2, 2)))

model_exp5.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp5.add(MaxPooling2D(pool_size = (2, 2)))

model_exp5.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp5.add(MaxPooling2D(pool_size = (2, 2)))

model_exp5.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp5.add(MaxPooling2D(pool_size = (2, 2)))

model_exp5.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp5.add(MaxPooling2D(pool_size = (2, 2)))

model_exp5.add(Flatten())
model_exp5.add(Dense(64, activation = 'relu'))
model_exp5.add(Dropout(0.24))
model_exp5.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp5.summary()

# Compiles the model
model_exp5.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp5 = model_exp5.fit(x_train, y_train,
                              epochs = 35, 
                              validation_data = (x_valid, y_valid),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp5.history.keys())

# Summarize history for accuracy
plt.plot(history_exp5.history['accuracy'])
plt.plot(history_exp5.history['val_accuracy'])
plt.title('Model accuracy (Balanced 320x320px Grayscale WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp5.history['loss'])
plt.plot(history_exp5.history['val_loss'])
plt.title('Model loss (Balanced 320x320px Grayscale WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 3m

# Model architechture
model_exp6 = Sequential()

model_exp6.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 1)))
model_exp6.add(MaxPooling2D(pool_size = (2, 2)))

model_exp6.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp6.add(MaxPooling2D(pool_size = (2, 2)))

model_exp6.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp6.add(MaxPooling2D(pool_size = (2, 2)))

model_exp6.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp6.add(MaxPooling2D(pool_size = (2, 2)))

model_exp6.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp6.add(MaxPooling2D(pool_size = (2, 2)))

model_exp6.add(Flatten())
model_exp6.add(Dense(64, activation = 'relu'))
model_exp6.add(Dropout(0.24))
model_exp6.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp6.summary()

# Compiles the model
model_exp6.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp6 = model_exp6.fit(datagen.flow(x_train, y_train,
                                           seed = 2021,
                                           shuffle = True),
                              epochs = 100,
                              validation_data = datagen.flow(x_valid, y_valid,
                                                             seed = 2021,
                                                             shuffle = True),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp6.history.keys())

# Summarize history for accuracy
plt.plot(history_exp6.history['accuracy'])
plt.plot(history_exp6.history['val_accuracy'])
plt.title('Model accuracy (Balanced 320x320px Grayscale WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp6.history['loss'])
plt.plot(history_exp6.history['val_loss'])
plt.title('Model loss (Balanced 320x320px Grayscale WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Imbalanced 320x320px Grayscale

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles320/grayscale/imbalancedall/train
!rm -rf datasets/scratches/defects/tiles320/grayscale/imbalancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles320/grayscale/imbalancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles320/grayscale/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image, 0) # Grayscale
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image, 0) # Grayscale
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

x_train = x_train.reshape(182, 320, 320, 1)
x_valid = x_valid.reshape(46, 320, 320, 1)

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 42s

# Model architechture
model_exp7 = Sequential()

model_exp7.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 1)))
model_exp7.add(MaxPooling2D(pool_size = (2, 2)))

model_exp7.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp7.add(MaxPooling2D(pool_size = (2, 2)))

model_exp7.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp7.add(MaxPooling2D(pool_size = (2, 2)))

model_exp7.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp7.add(MaxPooling2D(pool_size = (2, 2)))

model_exp7.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp7.add(MaxPooling2D(pool_size = (2, 2)))

model_exp7.add(Flatten())
model_exp7.add(Dense(64, activation = 'relu'))
model_exp7.add(Dropout(0.24))
model_exp7.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp7.summary()

# Compiles the model
model_exp7.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp7 = model_exp7.fit(x_train, y_train,
                              epochs = 35, 
                              validation_data = (x_valid, y_valid),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp7.history.keys())

# Summarize history for accuracy
plt.plot(history_exp7.history['accuracy'])
plt.plot(history_exp7.history['val_accuracy'])
plt.title('Model accuracy (Imbalanced 320x320px Grayscale WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp7.history['loss'])
plt.plot(history_exp7.history['val_loss'])
plt.title('Model loss (Imbalanced 320x320px Grayscale WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 3.5m

# Model architechture
model_exp8 = Sequential()

model_exp8.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (320, 320, 1)))
model_exp8.add(MaxPooling2D(pool_size = (2, 2)))

model_exp8.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp8.add(MaxPooling2D(pool_size = (2, 2)))

model_exp8.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp8.add(MaxPooling2D(pool_size = (2, 2)))

model_exp8.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp8.add(MaxPooling2D(pool_size = (2, 2)))

model_exp8.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp8.add(MaxPooling2D(pool_size = (2, 2)))

model_exp8.add(Flatten())
model_exp8.add(Dense(64, activation = 'relu'))
model_exp8.add(Dropout(0.24))
model_exp8.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp8.summary()

# Compiles the model
model_exp8.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp8 = model_exp8.fit(datagen.flow(x_train, y_train,
                                           seed = 2021,
                                           shuffle = True),
                              epochs = 100,
                              validation_data = datagen.flow(x_valid, y_valid,
                                                             seed = 2021,
                                                             shuffle = True),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp8.history.keys())

# Summarize history for accuracy
plt.plot(history_exp8.history['accuracy'])
plt.plot(history_exp8.history['val_accuracy'])
plt.title('Model accuracy (Imbalanced 320x320px Grayscale WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp8.history['loss'])
plt.plot(history_exp8.history['val_loss'])
plt.title('Model loss (Imbalanced 320x320px Grayscale WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Balanced 64x64px Color

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles64/color/balancedall/train
!rm -rf datasets/scratches/defects/tiles64/color/balancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles64/color/balancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles64/color/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image) # Color
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image) # Color
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 33s

# Model architechture
model_exp9 = Sequential()

model_exp9.add(Conv2D(32, (3, 3), activation = 'relu',
                      input_shape = (64, 64, 3)))
model_exp9.add(MaxPooling2D(pool_size = (2, 2)))

model_exp9.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp9.add(MaxPooling2D(pool_size = (2, 2)))

model_exp9.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp9.add(MaxPooling2D(pool_size = (2, 2)))

model_exp9.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp9.add(MaxPooling2D(pool_size = (2, 2)))

model_exp9.add(Flatten())
model_exp9.add(Dense(64, activation = 'relu'))
model_exp9.add(Dropout(0.24))
model_exp9.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp9.summary()

# Compiles the model
model_exp9.compile(loss = 'binary_crossentropy',
                   metrics = ['accuracy'])

# Trains the model
history_exp9 = model_exp9.fit(x_train, y_train,
                              epochs = 35, 
                              validation_data = (x_valid, y_valid),
                              verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp9.history.keys())

# Summarize history for accuracy
plt.plot(history_exp9.history['accuracy'])
plt.plot(history_exp9.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Color WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp9.history['loss'])
plt.plot(history_exp9.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Color WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 7m

# Model architechture
model_exp10 = Sequential()

model_exp10.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 3)))
model_exp10.add(MaxPooling2D(pool_size = (2, 2)))

model_exp10.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp10.add(MaxPooling2D(pool_size = (2, 2)))

model_exp10.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp10.add(MaxPooling2D(pool_size = (2, 2)))

model_exp10.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp10.add(MaxPooling2D(pool_size = (2, 2)))

model_exp10.add(Flatten())
model_exp10.add(Dense(64, activation = 'relu'))
model_exp10.add(Dropout(0.24))
model_exp10.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp10.summary()

# Compiles the model
model_exp10.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp10 = model_exp10.fit(datagen.flow(x_train, y_train,
                                             seed = 2021,
                                             shuffle = True),
                                epochs = 100,
                                validation_data = datagen.flow(x_valid, y_valid,
                                                               seed = 2021,
                                                               shuffle = True),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp10.history.keys())

# Summarize history for accuracy
plt.plot(history_exp10.history['accuracy'])
plt.plot(history_exp10.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Color WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp10.history['loss'])
plt.plot(history_exp10.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Color WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Imbalanced 64x64px Color

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles64/color/imbalancedall/train
!rm -rf datasets/scratches/defects/tiles64/color/imbalancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles64/color/imbalancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles64/color/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image) # Color
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image) # Color
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 42s

# Model architechture
model_exp11 = Sequential()

model_exp11.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 3),))
model_exp11.add(MaxPooling2D(pool_size = (2, 2)))

model_exp11.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp11.add(MaxPooling2D(pool_size = (2, 2)))

model_exp11.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp11.add(MaxPooling2D(pool_size = (2, 2)))

model_exp11.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp11.add(MaxPooling2D(pool_size = (2, 2)))

model_exp11.add(Flatten())
model_exp11.add(Dense(64, activation = 'relu'))
model_exp11.add(Dropout(0.24))
model_exp11.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp11.summary()

# Compiles the model
model_exp11.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp11 = model_exp11.fit(x_train, y_train,
                                epochs = 35, 
                                validation_data = (x_valid, y_valid),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp11.history.keys())

# Summarize history for accuracy
plt.plot(history_exp11.history['accuracy'])
plt.plot(history_exp11.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Color WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp11.history['loss'])
plt.plot(history_exp11.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Color WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 7m

# Model architechture
model_exp12 = Sequential()

model_exp12.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 3)))
model_exp12.add(MaxPooling2D(pool_size = (2, 2)))

model_exp12.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp12.add(MaxPooling2D(pool_size = (2, 2)))

model_exp12.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp12.add(MaxPooling2D(pool_size = (2, 2)))

model_exp12.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp12.add(MaxPooling2D(pool_size = (2, 2)))

model_exp12.add(Flatten())
model_exp12.add(Dense(64, activation = 'relu'))
model_exp12.add(Dropout(0.24))
model_exp12.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp12.summary()

# Compiles the model
model_exp12.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp12 = model_exp12.fit(datagen.flow(x_train, y_train,
                                             seed = 2021,
                                             shuffle = True),
                                epochs = 200,
                                validation_data = datagen.flow(x_valid, y_valid,
                                                               seed = 2021,
                                                               shuffle = True),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp12.history.keys())

# Summarize history for accuracy
plt.plot(history_exp12.history['accuracy'])
plt.plot(history_exp12.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Color WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp12.history['loss'])
plt.plot(history_exp12.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Color WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Balanced 64x64px Grayscale

###Creates the Training and Validation sub-datasets


In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles64/grayscale/balancedall/train
!rm -rf datasets/scratches/defects/tiles64/grayscale/balancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles64/grayscale/balancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image, 0) # Grayscale
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image, 0) # Grayscale
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

x_train = x_train.reshape(1955, 64, 64, 1)
x_valid = x_valid.reshape(489, 64, 64, 1)

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 34s

# Model architechture
model_exp13 = Sequential()

model_exp13.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 1),))
model_exp13.add(MaxPooling2D(pool_size = (2, 2)))

model_exp13.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp13.add(MaxPooling2D(pool_size = (2, 2)))

model_exp13.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp13.add(MaxPooling2D(pool_size = (2, 2)))

model_exp13.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp13.add(MaxPooling2D(pool_size = (2, 2)))

model_exp13.add(Flatten())
model_exp13.add(Dense(64, activation = 'relu'))
model_exp13.add(Dropout(0.24))
model_exp13.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp13.summary()

# Compiles the model
model_exp13.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp13 = model_exp13.fit(x_train, y_train,
                                epochs = 35, 
                                validation_data = (x_valid, y_valid),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp13.history.keys())

# Summarize history for accuracy
plt.plot(history_exp13.history['accuracy'])
plt.plot(history_exp13.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Grayscale WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp13.history['loss'])
plt.plot(history_exp13.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Grayscale WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 4m

# Model architechture
model_exp14 = Sequential()

model_exp14.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 1)))
model_exp14.add(MaxPooling2D(pool_size = (2, 2)))

model_exp14.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp14.add(MaxPooling2D(pool_size = (2, 2)))

model_exp14.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp14.add(MaxPooling2D(pool_size = (2, 2)))

model_exp14.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp14.add(MaxPooling2D(pool_size = (2, 2)))

model_exp14.add(Flatten())
model_exp14.add(Dense(64, activation = 'relu'))
model_exp14.add(Dropout(0.24))
model_exp14.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp14.summary()

# Compiles the model
model_exp14.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp14 = model_exp14.fit(datagen.flow(x_train, y_train,
                                             seed = 2021,
                                             shuffle = True),
                                epochs = 200,
                                validation_data = datagen.flow(x_valid, y_valid,
                                                               seed = 2021,
                                                               shuffle = True),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp14.history.keys())

# Summarize history for accuracy
plt.plot(history_exp14.history['accuracy'])
plt.plot(history_exp14.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Grayscale WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp14.history['loss'])
plt.plot(history_exp14.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Grayscale WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Imbalanced 64x64px Grayscale

###Creates the Training and Validation sub-datasets

In [None]:
# Creates the Training and Validation subsets of the dataset from CSV
# randomly, in folders
# No augmentation aplied yet
# 1s

!rm -rf datasets/scratches/defects/tiles64/grayscale/imbalancedall/train
!rm -rf datasets/scratches/defects/tiles64/grayscale/imbalancedall/valid

import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Folder containing the tiles and CSV
home_path = r'datasets/scratches/defects/tiles64/grayscale/imbalancedall'

# Creates train and validation folders
train_path = os.path.join(home_path, 'train')
os.mkdir(train_path)
val_path = os.path.join(home_path, 'valid')
os.mkdir(val_path)

# Creates sub-folders for scratched and notscratched tiles in train and
# validation folders
scratched_train_path = os.path.join(home_path + r'/train', 'scratched')
os.mkdir(scratched_train_path)

notscratched_train_path = os.path.join(home_path + r'/train', 'notscratched')
os.mkdir(notscratched_train_path)

scratched_val_path = os.path.join(home_path + r'/valid', 'scratched')
os.mkdir(scratched_val_path)

notscratched_val_path = os.path.join(home_path + r'/valid', 'notscratched')
os.mkdir(notscratched_val_path)

# Original DataFrame with the data from the CSV
df = pd.read_csv('datasets/scratches/defects/tiles64/grayscale/scratched_notscratched.csv')

# Images and Categories
X = df.loc[:,'image_name']
y = df.loc[:,'scratched_notscratched']

# Splits data into Training and Validation
train_x, val_x, train_y, val_y = train_test_split(X, y,
                                                  test_size = 0.2,
                                                  random_state = 2021,
                                                  stratify = y)

# Train DataFrame
df_train = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_train['image_name'] = train_x
df_train['scratched_notscratched'] = train_y

# Validation DataFrame
df_valid = pd.DataFrame(columns = ['image_name', 'scratched_notscratched'])
df_valid['image_name'] = val_x
df_valid['scratched_notscratched'] = val_y

# Resets indexes
df_train.reset_index(drop = True, inplace = True)
df_valid.reset_index(drop = True, inplace = True)

# Copy train images to sub-folder
for i in range(len(df_train)):

    image = df_train.loc[i, 'image_name']

    if df_train.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_train_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_train_path)
        
# Copy validation images to sub-folder
for i in range(len(df_valid)):

    image = df_valid.loc[i,'image_name']

    if df_valid.loc[i, 'scratched_notscratched'] == 0:
        shutil.copy(home_path + r'/' + image, notscratched_val_path)
    else:
        shutil.copy(home_path + r'/' + image, scratched_val_path)

In [None]:
# Loads the tiles into the Train and Validation arrays

import cv2
from tensorflow import keras
import numpy as np

# Images
train_images = df_train.loc[:,'image_name']
train_labels = df_train.loc[:,'scratched_notscratched']

valid_images = df_valid.loc[:,'image_name']
valid_labels = df_valid.loc[:,'scratched_notscratched']

# Train images
x_train = []
for i in train_images:
    image = home_path + '/' + i
    img = cv2.imread(image, 0) # Grayscale
    x_train.append(img.squeeze())

# Train labels
y_train = keras.utils.to_categorical(train_labels)

# Validation images
x_valid = []
for i in valid_images:
    image = home_path + '/' +i
    img = cv2.imread(image, 0) # Grayscale
    x_valid.append(img.squeeze())

# Validation labels
y_valid = keras.utils.to_categorical(valid_labels)

# Normalize images
x_train = np.array(x_train, dtype = "float") / 255.0
x_valid = np.array(x_valid, dtype = "float") / 255.0

x_train = x_train.reshape(1955, 64, 64, 1)
x_valid = x_valid.reshape(489, 64, 64, 1)

###Model without Augmentation

In [None]:
# Configures and runs the model WITHOUT Augmentation
# GPU 32s

# Model architechture
model_exp15 = Sequential()

model_exp15.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 1),))
model_exp15.add(MaxPooling2D(pool_size = (2, 2)))

model_exp15.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp15.add(MaxPooling2D(pool_size = (2, 2)))

model_exp15.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp15.add(MaxPooling2D(pool_size = (2, 2)))

model_exp15.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp15.add(MaxPooling2D(pool_size = (2, 2)))

model_exp15.add(Flatten())
model_exp15.add(Dense(64, activation = 'relu'))
model_exp15.add(Dropout(0.24))
model_exp15.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp15.summary()

# Compiles the model
model_exp15.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp15 = model_exp15.fit(x_train, y_train,
                                epochs = 35, 
                                validation_data = (x_valid, y_valid),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp15.history.keys())

# Summarize history for accuracy
plt.plot(history_exp15.history['accuracy'])
plt.plot(history_exp15.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Grayscale WITHOUT Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp15.history['loss'])
plt.plot(history_exp15.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Grayscale WITHOUT Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.show()

###Model with Augmentation

In [None]:
# Configures and runs the model WITH Augmentation
# GPU 4m

# Model architechture
model_exp16 = Sequential()

model_exp16.add(Conv2D(32, (3, 3), activation = 'relu',
                       input_shape = (64, 64, 1)))
model_exp16.add(MaxPooling2D(pool_size = (2, 2)))

model_exp16.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp16.add(MaxPooling2D(pool_size = (2, 2)))

model_exp16.add(Conv2D(32, (3, 3), activation = 'relu'))
model_exp16.add(MaxPooling2D(pool_size = (2, 2)))

model_exp16.add(Conv2D(64, (3, 3), activation = 'relu'))
model_exp16.add(MaxPooling2D(pool_size = (2, 2)))

model_exp16.add(Flatten())
model_exp16.add(Dense(64, activation = 'relu'))
model_exp16.add(Dropout(0.24))
model_exp16.add(Dense(2, activation = 'softmax'))

# Shows model summary
model_exp16.summary()

# Compiles the model
model_exp16.compile(loss = 'binary_crossentropy',
                    metrics = ['accuracy'])

# Trains the model
history_exp16 = model_exp16.fit(datagen.flow(x_train, y_train,
                                             seed = 2021,
                                             shuffle = True),
                                epochs = 100,
                                validation_data = datagen.flow(x_valid, y_valid,
                                                               seed = 2021,
                                                               shuffle = True),
                                verbose = 1)

In [None]:
# Shows training results in a graphical way

import matplotlib.pyplot as plt

# List all data in history
print(history_exp16.history.keys())

# Summarize history for accuracy
plt.plot(history_exp16.history['accuracy'])
plt.plot(history_exp16.history['val_accuracy'])
plt.title('Model accuracy (Balanced 64x64px Grayscale WITH Augmentation)')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper left')
plt.show()

# Summarize history for loss
plt.plot(history_exp16.history['loss'])
plt.plot(history_exp16.history['val_loss'])
plt.title('Model loss (Balanced 64x64px Grayscale WITH Augmentation)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc = 'upper right')
plt.ylim(0, 4)
plt.show()

##Comparing model results

In [None]:
fig, axs = plt.subplots(8, 4)
fig.set_figheight(20)
fig.set_figwidth(15)
fig.tight_layout(pad = 4.0)

axs[0, 0].plot(history_exp1.history['accuracy'])
axs[0, 0].plot(history_exp1.history['val_accuracy'])
axs[0, 0].set_title('Acc. B-320-C-NA')
axs[0, 0].set(ylim = (0.4, 0.8))

axs[0, 1].plot(history_exp1.history['loss'])
axs[0, 1].plot(history_exp1.history['val_loss'])
axs[0, 1].set_title('Loss B-320-C-NA')
axs[0, 1].set(ylim = (0, 4))

axs[0, 2].plot(history_exp2.history['accuracy'])
axs[0, 2].plot(history_exp2.history['val_accuracy'])
axs[0, 2].set_title('Acc. B-320-C-A')
axs[0, 2].set(ylim = (0.4, 0.8))

axs[0, 3].plot(history_exp2.history['loss'])
axs[0, 3].plot(history_exp2.history['val_loss'])
axs[0, 3].set_title('Loss B-320-C-A')
axs[0, 3].set(ylim = (0, 4))

axs[1, 0].plot(history_exp3.history['accuracy'])
axs[1, 0].plot(history_exp3.history['val_accuracy'])
axs[1, 0].set_title('Acc. IB-320-C-NA')
axs[1, 0].set(ylim = (0.4, 0.8))

axs[1, 1].plot(history_exp3.history['loss'])
axs[1, 1].plot(history_exp3.history['val_loss'])
axs[1, 1].set_title('Loss IB-320-C-NA')
axs[1, 1].set(ylim = (0, 4))

axs[1, 2].plot(history_exp4.history['accuracy'])
axs[1, 2].plot(history_exp4.history['val_accuracy'])
axs[1, 2].set_title('Acc. IB-320-C-A')
axs[1, 2].set(ylim = (0.4, 0.8))

axs[1, 3].plot(history_exp4.history['loss'])
axs[1, 3].plot(history_exp4.history['val_loss'])
axs[1, 3].set_title('Loss IB-320-C-A')
axs[1, 3].set(ylim = (0, 4))

axs[2, 0].plot(history_exp5.history['accuracy'])
axs[2, 0].plot(history_exp5.history['val_accuracy'])
axs[2, 0].set_title('Acc. B-320-GS-NA')
axs[2, 0].set(ylim = (0.4, 0.8))

axs[2, 1].plot(history_exp5.history['loss'])
axs[2, 1].plot(history_exp5.history['val_loss'])
axs[2, 1].set_title('Loss B-320-GS-NA')
axs[2, 1].set(ylim = (0, 4))

axs[2, 2].plot(history_exp6.history['accuracy'])
axs[2, 2].plot(history_exp6.history['val_accuracy'])
axs[2, 2].set_title('Acc. B-320-GS-A')
axs[2, 2].set(ylim = (0.4, 0.8))

axs[2, 3].plot(history_exp6.history['loss'])
axs[2, 3].plot(history_exp6.history['val_loss'])
axs[2, 3].set_title('Loss B-320-GS-A')
axs[2, 3].set(ylim = (0, 4))

axs[3, 0].plot(history_exp7.history['accuracy'])
axs[3, 0].plot(history_exp7.history['val_accuracy'])
axs[3, 0].set_title('Acc. IB-320-GS-NA')
axs[3, 0].set(ylim = (0.4, 0.8))

axs[3, 1].plot(history_exp7.history['loss'])
axs[3, 1].plot(history_exp7.history['val_loss'])
axs[3, 1].set_title('Loss IB-320-GS-NA')
axs[3, 1].set(ylim = (0, 4))

axs[3, 2].plot(history_exp8.history['accuracy'])
axs[3, 2].plot(history_exp8.history['val_accuracy'])
axs[3, 2].set_title('Acc. IB-320-GS-A')
axs[3, 2].set(ylim = (0.4, 0.8))

axs[3, 3].plot(history_exp8.history['loss'])
axs[3, 3].plot(history_exp8.history['val_loss'])
axs[3, 3].set_title('Loss IB-320-GS-A')
axs[3, 3].set(ylim = (0, 4))

axs[4, 0].plot(history_exp9.history['accuracy'])
axs[4, 0].plot(history_exp9.history['val_accuracy'])
axs[4, 0].set_title('Acc. B-64-C-NA')
axs[4, 0].set(ylim = (0.4, 0.8))

axs[4, 1].plot(history_exp9.history['loss'])
axs[4, 1].plot(history_exp9.history['val_loss'])
axs[4, 1].set_title('Loss B-64-C-NA')
axs[4, 1].set(ylim = (0, 4))

axs[4, 2].plot(history_exp10.history['accuracy'])
axs[4, 2].plot(history_exp10.history['val_accuracy'])
axs[4, 2].set_title('Acc. B-64-C-A')
axs[4, 2].set(ylim = (0.4, 0.8))

axs[4, 3].plot(history_exp10.history['loss'])
axs[4, 3].plot(history_exp10.history['val_loss'])
axs[4, 3].set_title('Loss B-64-C-A')
axs[4, 3].set(ylim = (0, 4))

axs[5, 0].plot(history_exp11.history['accuracy'])
axs[5, 0].plot(history_exp11.history['val_accuracy'])
axs[5, 0].set_title('Acc. IB-64-C-NA')
axs[5, 0].set(ylim = (0.4, 0.8))

axs[5, 1].plot(history_exp11.history['loss'])
axs[5, 1].plot(history_exp11.history['val_loss'])
axs[5, 1].set_title('Loss IB-64-C-NA')
axs[5, 1].set(ylim = (0, 4))

axs[5, 2].plot(history_exp12.history['accuracy'])
axs[5, 2].plot(history_exp12.history['val_accuracy'])
axs[5, 2].set_title('Acc. IB-64-C-A')
axs[5, 2].set(ylim = (0.4, 0.8))

axs[5, 3].plot(history_exp12.history['loss'])
axs[5, 3].plot(history_exp12.history['val_loss'])
axs[5, 3].set_title('Loss IB-64-C-A')
axs[5, 3].set(ylim = (0, 4))

axs[6, 0].plot(history_exp13.history['accuracy'])
axs[6, 0].plot(history_exp13.history['val_accuracy'])
axs[6, 0].set_title('Acc. B-64-GS-NA')
axs[6, 0].set(ylim = (0.4, 0.8))

axs[6, 1].plot(history_exp13.history['loss'])
axs[6, 1].plot(history_exp13.history['val_loss'])
axs[6, 1].set_title('Loss B-64-GS-NA')
axs[6, 1].set(ylim = (0, 4))

axs[6, 2].plot(history_exp14.history['accuracy'])
axs[6, 2].plot(history_exp14.history['val_accuracy'])
axs[6, 2].set_title('Acc. B-64-GS-A')
axs[6, 2].set(ylim = (0.4, 0.8))

axs[6, 3].plot(history_exp14.history['loss'])
axs[6, 3].plot(history_exp14.history['val_loss'])
axs[6, 3].set_title('Loss B-64-GS-A')
axs[6, 3].set(ylim = (0, 4))

axs[7, 0].plot(history_exp15.history['accuracy'])
axs[7, 0].plot(history_exp15.history['val_accuracy'])
axs[7, 0].set_title('Acc. IB-64-GS-NA')
axs[7, 0].set(ylim = (0.4, 0.8))

axs[7, 1].plot(history_exp15.history['loss'])
axs[7, 1].plot(history_exp15.history['val_loss'])
axs[7, 1].set_title('Loss IB-64-GS-NA')
axs[7, 1].set(ylim = (0, 4))

axs[7, 2].plot(history_exp16.history['accuracy'])
axs[7, 2].plot(history_exp16.history['val_accuracy'])
axs[7, 2].set_title('Acc. IB-64-GS-A')
axs[7, 2].set(ylim = (0.4, 0.8))

axs[7, 3].plot(history_exp16.history['loss'])
axs[7, 3].plot(history_exp16.history['val_loss'])
axs[7, 3].set_title('Loss IB-64-GS-A')
axs[7, 3].set(ylim = (0, 4))

for ax in axs.flat:
    ax.set(xlabel = 'Epoch')

plt.show()