# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import cv2
# Load every 32nd frame from the TIFF files listed in data.npy
import os

data_arr = np.load('data.npy', allow_pickle=True)
all_paths = data_arr['data'] if isinstance(data_arr, np.lib.npyio.NpzFile) else data_arr['data']
selected_paths = all_paths[::32]

frames = []
for fpath in selected_paths:
    frame = cv2.imread(fpath, cv2.IMREAD_GRAYSCALE)
    frames.append(frame)
movie = np.stack(frames, axis=0)
print('Movie shape:', movie.shape)  # (num_frames, height, width)
# Reshape the movie: each frame is a data point, each pixel is a feature
num_frames = movie.shape[0]
num_pixels = np.prod(movie.shape[1:])
X = movie.reshape(num_frames, num_pixels)
print('Reshaped data shape:', X.shape)

# Perform PCA and plot explained variance ratio for top 20 components
pca = PCA(n_components=20)
pca.fit(X)
explained_var = pca.explained_variance_ratio_ * 100  # percent

plt.figure(figsize=(8, 5))
plt.bar(range(1, 21), explained_var)
plt.xlabel('Principal Component')
plt.ylabel('% Variance Explained')
plt.title('Top 20 PCA Components: % Variance Explained')
plt.show()

# Show 2 images with high and 2 with low principal component values for each of the top 5 PCs

# Project data onto principal components
X_pca = pca.transform(X)

num_pcs = 5
num_high = 2
num_low = 2

fig, axes = plt.subplots(num_pcs, num_high + num_low, figsize=(12, 2.5 * num_pcs))
if num_pcs == 1:
    axes = axes[np.newaxis, :]  # Ensure axes is 2D

for pc in range(num_pcs):
    # Get the projection values for this PC
    pc_values = X_pca[:, pc]
    # Indices of lowest and highest values
    low_idx = np.argsort(pc_values)[:num_low]
    high_idx = np.argsort(pc_values)[-num_high:][::-1]
    selected_idx = np.concatenate([low_idx, high_idx])
    for j, idx in enumerate(selected_idx):
        ax = axes[pc, j]
        ax.imshow(movie[idx], cmap='gray')
        if j < num_low:
            ax.set_title(f'PC{pc+1} Low #{j+1}')
        else:
            ax.set_title(f'PC{pc+1} High #{j-num_low+1}')
        ax.axis('off')
plt.tight_layout()
plt.show()

# Load data.npy and display the head, plus percentage of '1' for each label column
import numpy as np

data_arr = np.load('data.npy', allow_pickle=True)
print("data.npy dtype:", data_arr.dtype)
print("First 5 entries:")
print(data_arr[:5])

# Print percentage of '1' for each label column
label_cols = [col for col in data_arr.dtype.names if col.startswith('label')]
for col in label_cols:
    values = data_arr[col]
    pct_ones = 100.0 * np.sum(values == 1) / len(values)
    print(f"{col}: {pct_ones:.2f}% are 1")

data.npy dtype: [('data', '<U256'), ('label1_training1', '<i4'), ('label2_training1', '<i4'), ('label1_training2', '<i4'), ('label2_training2', '<i4'), ('label1_training3', '<i4'), ('label2_training3', '<i4'), ('label1_training4', '<i4'), ('label2_training4', '<i4'), ('label1_training5', '<i4'), ('label2_training5', '<i4'), ('label1_training6', '<i4'), ('label2_training6', '<i4'), ('label1_training7', '<i4'), ('label2_training7', '<i4'), ('label1_training8', '<i4'), ('label2_training8', '<i4'), ('label1_training9', '<i4'), ('label2_training9', '<i4'), ('label1_training10', '<i4'), ('label2_training10', '<i4')]
First 5 entries:
[('data\\000000.tiff', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
 ('data\\000001.tiff', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
 ('data\\000002.tiff', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
 ('data\\000003.tiff', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
 ('data\\000004.tiff', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)]
label1_training1: 0.06% are 1
label2_training1: 1.04% are 1
label1_training2: 0.00% are 1
label2_training2: 10.41% are 1
label1_training3: 0.00% are 1
label2_training3: 8.01% are 1
label1_training4: 0.00% are 1
label2_training4: 15.91% are 1
label1_training5: 0.00% are 1
label2_training5: 15.53% are 1
label1_training6: 0.00% are 1
label2_training6: 24.86% are 1
label1_training7: 0.00% are 1
label2_training7: 29.37% are 1
label1_training8: 0.00% are 1
label2_training8: 26.50% are 1
label1_training9: 0.00% are 1
label2_training9: 29.27% are 1
label1_training10: 0.00% are 1
label2_training10: 30.95% are 1

# Compare each subsequent round of 'label2_training' and display the final round composite image

import importlib
label_utils = importlib.reload(__import__('label_utils'))
compare_labels = label_utils.compare_labels
show_labels = label_utils.show_labels
import matplotlib.pyplot as plt

# Load data.npy (already loaded as data_arr)
label_cols = [col for col in data_arr.dtype.names if col.startswith('label2_training')]
label_cols_sorted = sorted(label_cols, key=lambda x: int(''.join(filter(str.isdigit, x))))

# Print percentage agreement between each subsequent round
for i in range(len(label_cols_sorted) - 1):
    l1 = data_arr[label_cols_sorted[i]]
    l2 = data_arr[label_cols_sorted[i+1]]
    pct = compare_labels(l1, l2)
    print(f"Agreement between {label_cols_sorted[i]} and {label_cols_sorted[i+1]}: {pct:.2f}%")

# Display composite image for the final round of label2_training
final_label_col = label_cols_sorted[-1]
composite_img = show_labels(data_arr['data'], data_arr[final_label_col], label_value_1=0, label_value_2=1, max_per_group=25)

# Add a vertical gap between the two sets of images
import cv2
gap_width = 30
h = composite_img.shape[0]
w = composite_img.shape[1]
half = w // 2
left = composite_img[:, :half, :]
right = composite_img[:, half:, :]
gap = np.ones((h, gap_width, 3), dtype=np.uint8) * 128
composite_with_gap = np.hstack([left, gap, right])

# Display with larger title, and labels above left and right sides
plt.figure(figsize=(composite_with_gap.shape[1] / 100, composite_with_gap.shape[0] / 100 + 2.5))
plt.imshow(composite_with_gap[..., ::-1])
plt.axis('off')
plt.title(f"Composite for {final_label_col}", fontsize=42, pad=40)

# Add labels above left and right sides
plt.text(0, -20, "label2=0", fontsize=22, fontweight='bold', color='green', va='bottom', ha='left')
plt.text(composite_with_gap.shape[1], -20, "label2=1", fontsize=22, fontweight='bold', color='blue', va='bottom', ha='right')
plt.show()

Agreement between label2_training1 and label2_training2: 90.37%
Agreement between label2_training2 and label2_training3: 96.63%
Agreement between label2_training3 and label2_training4: 92.07%
Agreement between label2_training4 and label2_training5: 97.31%
Agreement between label2_training5 and label2_training6: 90.57%
Agreement between label2_training6 and label2_training7: 94.83%
Agreement between label2_training7 and label2_training8: 96.36%
Agreement between label2_training8 and label2_training9: 96.95%
Agreement between label2_training9 and label2_training10: 97.70%

Road Labelling - Section 1¶

Intention¶

approach¶

What is this doing?¶

Initial data preparation¶

data investigation¶

PCA results¶

performing learning¶

concept¶

the code¶

some issues¶

initial results¶

detailed results¶

Final thoughts and summary¶

results¶

thoughts on copilot¶

next steps¶