14.5 ANN in PyTorch to Detect Cracks#

In this example we will apply our single-layer ANN model to detect cracks in images. Here we are flattening the images so will lose spatial information, might consider a convolution kernel to preserve spatial relationships.

The data are sourced from:

Adrien Müller, Nikos Karathanasopoulos, Christian C. Roth, Dirk Mohr, Machine Learning Classifiers for Surface Crack Detection in Fracture Experiments, International Journal of Mechanical Sciences, Volume 209, (2021), 106698, ISSN 0020-7403

Workflow:

  1. Destroy existing data (need explained below)

  2. Obtain the original dataset (.zip)

  3. Use python libraries to extract the images; they are stored in MatLab.mat file structures.

  4. Store imgaes locally into subdirectories for processing

  5. Then apply our ANN (homebrew or PyTorch)

%reset -f

Note

In this example, we delete the processed data primarily due to GitHub file size limitations and to demonstrate a clean, reproducible workflow.

In real-world research or applications, you typically do not delete large datasets once downloaded and processed. Instead, you would:

  • Archive them to a reliable location

  • Use versioning and metadata to track provenance

  • Avoid redundant downloads to conserve bandwidth and storage

This cleanup is used here for pedagogical clarity and control, especially in constrained teaching environments.

Data Cleanup Block#

Warning

The code block below will permanently delete all downloaded and generated data, including:

  • Extracted .mat files and working directories

  • Converted .png image files

  • CSV files containing image labels

Only run this cell if:

You want to start fresh

You have already backed up your work (if needed)

You're confident all required artifacts can be re-downloaded and regenerated

This is a reasonable practice to demonstrate reproducibility and manage disk space — but treat it with care!

import shutil
import os

# Define directories to delete
dirs_to_remove = [
    "extracted_dataset",  # Contains mmc1 and .mat files
    "images",             # Contains all converted PNGs
    "labels"              # CSV files mapping filenames to labels
]

for target_dir in dirs_to_remove:
    if os.path.exists(target_dir):
        print(f"Deleting: {target_dir}")
        shutil.rmtree(target_dir)
    else:
        print(f"Directory not found (already deleted?): {target_dir}")

print("Cleanup complete.")
Deleting: extracted_dataset
Deleting: images
Deleting: labels
Cleanup complete.
import os
import zipfile
import requests
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt

url = "https://ars.els-cdn.com/content/image/1-s2.0-S002074032100429X-mmc1.zip"

# Step 1: Download the ZIP archive
def download_file(url, output_path):
    if not os.path.exists(output_path):
        print(f"Downloading {url}...")
        response = requests.get(url)
        response.raise_for_status()
        with open(output_path, "wb") as f:
            f.write(response.content)
        print(f"Saved to {output_path}")
    else:
        print(f"File already exists: {output_path}")
# Workflow paths
url = "https://ars.els-cdn.com/content/image/1-s2.0-S002074032100429X-mmc1.zip"
zip_file = "dataset.zip"
extract_dir = "extracted_dataset"
png_output_dir = "images"
# Execute workflow
download_file(url, zip_file)
File already exists: dataset.zip
# Step 2: Extract ZIP archive
def extract_zip(zip_path, extract_to):
    print(f"Extracting {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Extracted to {extract_to}")
extract_zip(zip_file, extract_dir)
Extracting dataset.zip...
Extracted to extracted_dataset
def convert_mat_to_pngs(mat_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for filename in os.listdir(mat_dir):
        if filename.endswith(".mat"):
            full_path = os.path.join(mat_dir, filename)
            print(f"Processing {filename}...")
            mat_data = loadmat(full_path)

            # Try to guess the structure
            for key in mat_data:
                if not key.startswith("__"):
                    images = mat_data[key]

                    # Check dimensions
                    if images.ndim == 3:
                        for i in range(images.shape[2]):
                            img = images[:, :, i]
                            img = np.squeeze(img)
                            output_filename = f"{os.path.splitext(filename)[0]}_{i:03d}.png"
                            plt.imsave(os.path.join(output_dir, output_filename), img, cmap='gray')
                    elif images.ndim == 4:
                        for i in range(images.shape[3]):
                            img = images[:, :, :, i]
                            output_filename = f"{os.path.splitext(filename)[0]}_{i:03d}.png"
                            plt.imsave(os.path.join(output_dir, output_filename), img)
                    else:
                        print(f"Skipping unexpected shape: {images.shape}")
                    break  # process only first candidate
    print("Done.")
convert_mat_to_pngs(os.path.join(extract_dir, "mmc1"), png_output_dir)
Processing NT_NN.mat...
Skipping unexpected shape: (1, 1)
Processing UT_NN.mat...
Skipping unexpected shape: (1, 1)
Processing NT_TestSet_Features.mat...
Skipping unexpected shape: (8, 1407)
Processing ASB_NN.mat...
Skipping unexpected shape: (1, 1)
Processing ASB_TestSet_Features.mat...
Skipping unexpected shape: (21, 3741)
Processing ASB_TestSet_Imgs.mat...
Processing NT_TestSet_Imgs.mat...
Processing UT_TestSet_Imgs.mat...
Processing UT_TestSet_Features.mat...
Skipping unexpected shape: (6, 881)
Done.

This block moves files into subdirectories

import os
import shutil

# Base source directory
source_dir = "./images"

# --- ASB ---
asb_target_dir = os.path.join(source_dir, "ASB")
os.makedirs(asb_target_dir, exist_ok=True)

for filename in os.listdir(source_dir):
    if filename.startswith("ASB_Test"):
        src_path = os.path.join(source_dir, filename)
        dst_path = os.path.join(asb_target_dir, filename)
        if os.path.exists(src_path):  # only move if it still exists
            shutil.move(src_path, dst_path)

# --- NT ---
nt_target_dir = os.path.join(source_dir, "NT")
os.makedirs(nt_target_dir, exist_ok=True)

for filename in os.listdir(source_dir):
    if filename.startswith("NT_Test"):
        src_path = os.path.join(source_dir, filename)
        dst_path = os.path.join(nt_target_dir, filename)
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)

# --- UT ---
ut_target_dir = os.path.join(source_dir, "UT")
os.makedirs(ut_target_dir, exist_ok=True)

for filename in os.listdir(source_dir):
    if filename.startswith("UT_Test"):
        src_path = os.path.join(source_dir, filename)
        dst_path = os.path.join(ut_target_dir, filename)
        if os.path.exists(src_path):
            shutil.move(src_path, dst_path)
import os
import pandas as pd
from scipy.io import loadmat

# --- ASB ---
prefix = "ASB"
mat_path = f"extracted_dataset/mmc1/{prefix}_TestSet_Features.mat"
output_csv = f"labels/{prefix}_labels.csv"

mat_data = loadmat(mat_path)
labels = mat_data['YTest'].squeeze()

filenames = [f"{prefix}_Image_{i:03d}.png" for i in range(len(labels))]
df = pd.DataFrame({'filename': filenames, 'label': labels.astype(int)})

os.makedirs("labels", exist_ok=True)
df.to_csv(output_csv, index=False)
print(f"Saved {len(df)} labels to {output_csv}")

# --- NT ---
prefix = "NT"
mat_path = f"extracted_dataset/mmc1/{prefix}_TestSet_Features.mat"
output_csv = f"labels/{prefix}_labels.csv"

mat_data = loadmat(mat_path)
labels = mat_data['YTest'].squeeze()

filenames = [f"{prefix}_Image_{i:03d}.png" for i in range(len(labels))]
df = pd.DataFrame({'filename': filenames, 'label': labels.astype(int)})

os.makedirs("labels", exist_ok=True)
df.to_csv(output_csv, index=False)
print(f"Saved {len(df)} labels to {output_csv}")

# --- UT ---
prefix = "UT"
mat_path = f"extracted_dataset/mmc1/{prefix}_TestSet_Features.mat"
output_csv = f"labels/{prefix}_labels.csv"

mat_data = loadmat(mat_path)
labels = mat_data['YTest'].squeeze()

filenames = [f"{prefix}_Image_{i:03d}.png" for i in range(len(labels))]
df = pd.DataFrame({'filename': filenames, 'label': labels.astype(int)})

os.makedirs("labels", exist_ok=True)
df.to_csv(output_csv, index=False)
print(f"Saved {len(df)} labels to {output_csv}")
Saved 3741 labels to labels/ASB_labels.csv
Saved 1407 labels to labels/NT_labels.csv
Saved 881 labels to labels/UT_labels.csv
shutil.move("./labels/ASB_labels.csv", "./images/ASB/")
shutil.move("./labels/NT_labels.csv", "./images/NT/")
shutil.move("./labels/UT_labels.csv", "./images/UT/");

PyTorch ANN (Same dataset(s))#

The file pathnames are unique to my computer and are shown here so the notebook renders and typesets correctly.

import numpy              # useful numerical routines
import scipy.special      # special functions library
import scipy.misc         # image processing code
#import imageio           # deprecated as typical
import imageio.v2 as imageio
import matplotlib.pyplot  # import plotting routines

## Pre-processing Build training and testing sets
# now we have to flatten each image, put into a csv file, and add the truth table
# myann expects [truth, image fields ....]
# howmanyimages = 881
import csv
howmanyimages = 440 # a small subset for demonstration
testimage = numpy.array([i for i in range(howmanyimages)])
split = 0.1 # fraction to hold out for testing
numwritten = 0
# training file
outputfile1 = "ut-881-train.csv" #local to this directory
outfile1 = open(outputfile1,'w')  # open the file in the write mode
writer1 = csv.writer(outfile1) # create the csv writer
# testing file
outputfile2 = "ut-881-test.csv" #local to this directory
outfile2 = open(outputfile2,'w')  # open the file in the write mode
writer2 = csv.writer(outfile2) # create the csv writer
# process truth table (absolute pathname)
groundtruth = open("/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/images/UT/UT_labels.csv","r") #open the file in the reader mode
reader = csv.reader(groundtruth)
truthtable=[] # empty list to store class
for row in reader:
        truthtable.append(row[1])

for irow in range(len(truthtable)-1):
    truthtable[irow]=truthtable[irow+1] # shift all entries by 1
    
# Assume truthtable, split, howmanyimages, writer1, writer2, outfile1, outfile2 already defined
numwritten = 0
np.random.seed(42)
Ntest = 0
Ntrain = 0

for i in range(howmanyimages):
    # build zero-padded image filename: 000, 001, ...
    image_name = f"/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/images/UT/UT_TestSet_Imgs_{i:03}.png"
    
    # read and flatten image
    img_array = imageio.imread(image_name, mode='F')
    img_data = 255.0 - img_array.flatten()  # ensure it's 1D, matches reshape(16384)

    # add label to the front
    newimage = np.insert(img_data, 0, float(truthtable[i]))

    # randomly assign to training or test set
    if np.random.uniform() <= split:
        writer2.writerow(newimage)
        Ntest += 1
    else:
        writer1.writerow(newimage)
        Ntrain += 1
    
    numwritten += 1

outfile1.close()
outfile2.close()
print("Total images segregated and processed:", numwritten)
print("Training images segregated and processed:", Ntrain)
print("Testing images segregated and processed:", Ntest)
print("Lost images :",numwritten - Ntrain - Ntest)
Total images segregated and processed: 440
Training images segregated and processed: 384
Testing images segregated and processed: 56
Lost images : 0
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

# Hyperparameters
INPUT_SIZE = 128 * 128
HIDDEN_SIZE = INPUT_SIZE // 10
OUTPUT_SIZE = 2
LEARNING_RATE = 0.01
EPOCHS = 100
BATCH_SIZE = 8

# Define the ANN model
class SimpleANN(nn.Module):
    def __init__(self):
        super(SimpleANN, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, HIDDEN_SIZE)
        self.fc2 = nn.Linear(HIDDEN_SIZE, OUTPUT_SIZE)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Custom Dataset
class UTDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path, header=None)
        self.X = df.iloc[:, 1:].values.astype(np.float32)
        self.y = df.iloc[:, 0].values.astype(np.int64)
        # Normalize like before
        self.X = (self.X / 255.0 * 0.99) + 0.01

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), self.y[idx]

# Load datasets
train_dataset = UTDataset("ut-881-train.csv")
test_dataset  = UTDataset("ut-881-test.csv")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1)

# Initialize model, loss, optimizer
model = SimpleANN()
criterion = nn.CrossEntropyLoss()
#criterion = nn.MSELoss()  # similar to original
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
#optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

loss_history = []  # store average loss per epoch
verbose = False
##### TRAINING LOOP #####
print("Starting training...")
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0.0
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * batch_X.size(0)  # accumulate batch loss

    avg_loss = epoch_loss / len(train_loader.dataset)
    loss_history.append(avg_loss)
    if verbose:
        print(f"Epoch {epoch+1}/{EPOCHS}, Avg Loss: {avg_loss:.4f}")
print("Training complete.\n")
#########################

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4))
plt.plot(range(1, EPOCHS+1), loss_history, marker='o', linestyle='-', color='blue')
plt.title("Training Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Average Loss")
plt.grid(True)
plt.tight_layout()
plt.show()

# Evaluation loop
model.eval()
correct = 0
total = 0

verbose = False
print("Running test set predictions...")
with torch.no_grad():
    for X_test, y_test in test_loader:
        output = model(X_test)
        _, predicted = torch.max(output.data, 1)
        if verbose:
            print(f"predict = {predicted.item()}  true = {y_test.item()}  = {'correct' if predicted == y_test else 'wrong'}")
        total += 1
        correct += (predicted == y_test).sum().item()

accuracy = correct / total
print(f"Performance = {accuracy:.4f}")
Starting training...
Training complete.
../../_images/17b370dd81b17da66801dc3c9ea3f9a2a3663ad50e5dfe51832826b4d76c906a.png
Running test set predictions...
Performance = 0.8214

Interpretation of Loss Curve#

  • Steady decline → healthy learning

  • Early plateau → learning rate too low

  • Noisy or rising → learning rate too high or data inconsistency

import imageio.v2 as imageio  # PyTorch prefers Pillow, but this keeps your original structure
import matplotlib.pyplot as plt
import numpy as np
import torch

def classify_and_display_image_pytorch(model, image_path):
    """Classify a new image using the trained PyTorch model and display it."""
    # Load grayscale image
    img_array = imageio.imread(image_path, mode='F')  # float32 output
    img_array = np.max(img_array) - img_array         # Invert colors (as done in MNIST)
    
    # Normalize and reshape
    img_data = (img_array / 255.0 * 0.99) + 0.01
    input_tensor = torch.tensor(img_data.flatten(), dtype=torch.float32).unsqueeze(0)  # shape: [1, 784]
    
    # Disable gradient tracking for inference
    with torch.no_grad():
        output = model(input_tensor)
        label = torch.argmax(output).item()

    # Plot image and prediction
    plt.imshow(img_array, cmap='Greys')
    plt.title(f"Predicted Label: {label}")
    plt.axis('off')
    plt.show()
    
    return label
classify_and_display_image_pytorch(model, "/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/cat128.png")
../../_images/d03f3ae6d391fd41d3079ab0a9c833722167b97aea4ce639cadc2a367e9cb6b9.png
0
classify_and_display_image_pytorch(model, "/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/waterfall128.png")
../../_images/67a6562f283c5c3d636abb4b83f46149e01e9e6cbac4bea42e3d2e42a849c51f.png
0
classify_and_display_image_pytorch(model, "/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/concrete-cracks.png")
../../_images/65a2b3db5fa2d64ad84dbe755b6cdcc3958d82fcd5e4cfa03af2c12c463070ee.png
0

PyTorch Convolution Neural Network#

A Convolutional Neural Network (CNN) approach, is much more natural for spatially structured data like the 128×128 grayscale crack/no-crack images.

Why Use a CNN Here?#

The current MLP (fully connected layers) flattens the image and ignores spatial structure.

A CNN with a 9-pixel mask (i.e., a 3×3 convolution kernel; other kernels can be used) learns spatial patterns, such as edges and textures — precisely what you’d want for detecting cracks. The 3×3 convolution filter, slides across the image with padding to preserve dimensions. In CNNs, the filter weights are learned, not fixed - so that is a huge advantage to having to specify interpolative functions (as is done in other image processing contexts).

Illustrative code (in PyTorch) is simply replacing a few parts in the code above.

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

# Hyperparameters
INPUT_SIZE = 128 * 128
HIDDEN_SIZE = INPUT_SIZE // 10
OUTPUT_SIZE = 2
LEARNING_RATE = 0.01
EPOCHS = 100
BATCH_SIZE = 8

# Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, padding=1)  # 3×3 mask
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Downsamples 128×128 → 64×64
        self.conv2 = nn.Conv2d(8, 16, 3, padding=1)  # Second convolution
        self.fc1 = nn.Linear(16 * 32 * 32, 64)  # Flattened after second pooling
        self.fc2 = nn.Linear(64, 2)  # Final classification (2 classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # [batch, 8, 64, 64]
        x = self.pool(torch.relu(self.conv2(x)))  # [batch, 16, 32, 32]
        x = x.view(-1, 16 * 32 * 32)              # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Custom Dataset
class UTDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path, header=None)
        self.X = df.iloc[:, 1:].values.astype(np.float32)
        self.y = df.iloc[:, 0].values.astype(np.int64)
        # Normalize like before
        self.X = (self.X / 255.0 * 0.99) + 0.01

    def __len__(self):
        return len(self.y)

    #def __getitem__(self, idx):
    #    return torch.from_numpy(self.X[idx]), self.y[idx]

    def __getitem__(self, idx):
        x = self.X[idx].reshape(1, 128, 128)  # Add channel dimension
        return torch.from_numpy(x), self.y[idx]

# Load datasets
train_dataset = UTDataset("ut-881-train.csv")
test_dataset  = UTDataset("ut-881-test.csv")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1)

# Initialize model, loss, optimizer
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
#criterion = nn.MSELoss()  # similar to original
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
#optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_history = []  # store average loss per epoch
verbose = False
##### TRAINING LOOP #####
print("Starting training...")
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0.0
    for batch_X, batch_y in train_loader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item() * batch_X.size(0)  # accumulate batch loss

    avg_loss = epoch_loss / len(train_loader.dataset)
    loss_history.append(avg_loss)
    if verbose:
        print(f"Epoch {epoch+1}/{EPOCHS}, Avg Loss: {avg_loss:.4f}")
print("Training complete.\n")
#########################

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 4))
plt.plot(range(1, EPOCHS+1), loss_history, marker='o', linestyle='-', color='blue')
plt.title("Training Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Average Loss")
plt.grid(True)
plt.tight_layout()
plt.show()

# Evaluation loop
model.eval()
correct = 0
total = 0

verbose = False
print("Running test set predictions...")
with torch.no_grad():
    for X_test, y_test in test_loader:
        output = model(X_test)
        _, predicted = torch.max(output.data, 1)
        if verbose:
            print(f"predict = {predicted.item()}  true = {y_test.item()}  = {'correct' if predicted == y_test else 'wrong'}")
        total += 1
        correct += (predicted == y_test).sum().item()

accuracy = correct / total
print(f"Performance = {accuracy:.4f}")
Starting training...
Training complete.
../../_images/a1d88a99456282801bde13057c4a22299cdc1b117836321ef11a3d97dad2c9f4.png
Running test set predictions...
Performance = 0.9464
def predict_image(img_path, model):
    import imageio.v3 as imageio
    img_array = imageio.imread(img_path, mode='F')
    img_array = np.max(img_array) - img_array         # Invert colors 
    img_data = 255.0 - img_array.flatten()
    img_data = ((img_data / 255.0) * 0.99) + 0.01
    img_tensor = torch.from_numpy(img_data.astype(np.float32).reshape(1, 1, 128, 128))  # shape: [1,1,128,128]
    model.eval()
    with torch.no_grad():
        out = model(img_tensor)
        label = torch.argmax(out).item()

    # Plot image and prediction
    plt.imshow(img_array, cmap='Greys')
    plt.title(f"Predicted Label: {label}")
    plt.axis('off')
    plt.show()
    return label
predict_image("/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/concrete-cracks.png" ,model)
../../_images/f744611041a37e15b39bd5cd4ffdab2e4fa76308a0d9431dbb918e08a66cd167.png
1
predict_image("/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/waterfall128.png" ,model)
../../_images/67a6562f283c5c3d636abb4b83f46149e01e9e6cbac4bea42e3d2e42a849c51f.png
0
predict_image("/home/sensei/ce-5319-webroot/MLBE4CE/chapters/14-neuralnetworks/cat128.png" ,model)
../../_images/b0f4995b2af1cc57703d5a534f282fe10d4b05e0a1e3e6d91927bff6848f0114.png
1

Exercise(s)#

ce5319-es4-2025-2.pdf

End of Subsection#