# See link above for g_invariance.py
from g_invariance import *

# Define 90-rotation function.
def rotate_array(array, degree):
    if (array.ndim == 2 and degree % 90 == 0):
        return onp.rot90(array, degree // 90)
    else:
        print("Can only rorate 2D array by 90n degrees.")
        raise RuntimeError


# Define average of 4 rotations (0, 90, 180, 270) function.
def averaged_four_rotations(array):
    buf = []
    for degree in (0, 90, 180, 270):
        buf.append(rotate_array(array, degree))
    return sum(buf) / len(buf)


# Define number of CPUs to use for subspace generation.
NUM_TRANSFORM_CPUS = 4

#Important variables
#  - transform_mat is out Reynods operator \bar{T}
#  - null_eigenvectors are the eigenvectors of \bar{T}

# Apply given transformation on onehot corresponding to each pixel of 2D image.
# Use Python multiprocessing since it is work with numpy array.
# If it is torch Tensor, it is recommended to use PyTorch wrapped version of
# multiprocessing (torch.multiprocessing).
def get_invariant_subspace(transform_func, shape):
    # Use multiprocessing to get transformation matrix correspond to given
    # transformation function by paralleling onehot dimensions.
    numel = onp.prod(shape)
    pool = mp.Pool(NUM_TRANSFORM_CPUS)
    transformed_buf = pool.map(
        partial(
            transformed_onehot,
            transform_func=transform_func, shape=shape,
        ),
        range(numel),
    )
    pool.close()
    pool.join()
    
    # Form a single Reynods operator matrix from all results
    transform_mat = onp.stack(transformed_buf,axis=1)

    # Eigenvectors is used to describe the transformation subspace.
    # Use right side of decomposition thus eigenvectors are located at rows.
    # Transformation matrix is assumed to be real symmetric, thus Hermitian is
    # used for optimization.
    _, eigenvalues, eigenvectors = onp.linalg.svd(transform_mat,hermitian=True)

    # Eigenvalues are sorted from high to low, thus for over rank eigenvectors,
    # they are null eigenvectors assigned with 0 eigenvalues.
    # We should focus on non-trival eigenvectors.
    rank = onp.linalg.matrix_rank(onp.diag(eigenvalues),hermitian=True)
    
    eigenvectors = eigenvectors[:rank]
    return transform_mat, eigenvectors.T


transform_mat, eigenvectors = get_invariant_subspace(averaged_four_rotations, (height, width))

print(f"The Reynolds operator tranformation matrix has dimension {transform_mat.shape}")
print(f"The subspace W is the subspace span by {eigenvectors.shape[1]} eigenvectors")
print("--- First 16 eigenvectors shown as images ---")
showSubspace(eigenvectors, (height, width), ndim=16, channels=False)

The Reynolds operator tranformation matrix has dimension (784, 784)
The subspace W is the subspace span by 196 eigenvectors
--- First 16 eigenvectors shown as images ---


import torch.nn as nn

class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        
        # We will later replace the first layer with a G-invariant layer.
        # Rest of the code remains the same.
        self.first_layer = nn.Linear(784, 50)
        
        self.mlp = nn.Sequential(
                self.first_layer,
                nn.ReLU(),
                nn.Linear(50, 10)
        )
                
    def forward(self, X):
        out = self.mlp(X)
        return out
        
model = MLP()


# Standard MLP with 1 hidden layer.
rng = torch.Generator("cpu")
rng.manual_seed(0)

# Use Pytorch's pre-defined SGD optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=LR)

best_acc = 0
best_state_dict = copy.deepcopy(model.state_dict())
print("=====", "=" * 8)
print("Epoch", "Train Accuracy")
print("-----", "-" * 8)
for epc in range(NUM_EPOCHS):
    # Creates a mini-batch
    for batch_cnt, batch_itr in enumerate(
        torch.randperm(
            len(train_input),
            generator=rng,
        )[0:NUM_SAMPLES].view(
            NUM_BATCHES, BATCH_SIZE,
        ),
    ):
        # Zero gradients of optimize
        optimizer.zero_grad()
        # Forward pass of model
        train_output = model(train_input[batch_itr])
        # Define loss
        train_loss = torch.nn.functional.cross_entropy(train_output, train_target[batch_itr])
        # Backprop
        train_loss.backward()
        # One gradient step
        optimizer.step()
        
    valid_acc = accuracy(model, valid_input, valid_target)
    if (valid_acc > best_acc):
        print("{:5d} {:.6f}".format(epc + 1, valid_acc))
        best_acc = valid_acc
        best_state_dict = copy.deepcopy(model.state_dict())
    elif ((epc + 1) % 100 == 0):
        print("{:5d} {:.6f}".format(epc + 1, valid_acc))
    else:
        pass
print("=====", "=" * 8)
model.load_state_dict(best_state_dict)
train_acc = accuracy(model, train_input, train_target)
valid_acc = accuracy(model, valid_input, valid_target)
test_acc = accuracy(model, test_input, test_target)
aug_test_acc = accuracy(model, aug_test_input, aug_test_target)
print("=====", "=" * 8)
print("Data ", "\tAccuracy")
print("-----", "-" * 8)
print("Original Train", "{:.6f}".format(train_acc))
print("Original Valid", "{:.6f}".format(valid_acc))
print("Original Test ", "{:.6f}".format(test_acc))
print("Rotated  Test ", "{:.6f}".format(aug_test_acc))
print("=====", "=" * 8)

===== ========
Epoch Train Accuracy
----- --------
    1 0.837191
    2 0.881836
    3 0.895008
    4 0.904400
    5 0.909238
    6 0.912992
    7 0.914837
    8 0.918475
    9 0.921252
   10 0.924050
   12 0.925555
   13 0.929452
   15 0.931796
   17 0.932742
   18 0.932750
   20 0.933319
   21 0.934537
   22 0.936521
   26 0.937423
   27 0.938605
   30 0.938694
   31 0.941817
   37 0.942573
   53 0.943196
   61 0.944435
   66 0.944688
   85 0.945852
  100 0.945048
===== ========
===== ========
Data  	Accuracy
----- --------
Original Train 0.986855
Original Valid 0.945852
Original Test  0.947650
Rotated  Test  0.372948
===== ========


import math
eigentensor = torch.from_numpy(eigenvectors).float()

class GInvariantLayer(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super().__init__()
        
        # Load the left 1-eigenvectors of the Reynolds operator that we computed before.
        self.basis = eigentensor.T
        assert self.basis.shape[1] == input_dim
        
        self.coeffs = nn.Parameter(torch.Tensor(output_dim, self.basis.shape[0], 1))
        self.bias = nn.Parameter(torch.Tensor(output_dim))
        
        stdv = 1.0/math.sqrt(output_dim)
        self.coeffs.data.uniform_(-stdv, stdv)
        self.bias.data.zero_()
    

    def forward(self, X):
        # Input shape: torch.Size([minibatch, input_dim])
        
        if self.basis.device != X.device:
            self.basis = self.basis.to(X.device)

        # Construct weight w \in \mathcal{W} (the left 1-eigenspace)
        #      using the current learnable coefficients.
        # coeffs: (output_dim, n_basis, 1)
        # basis  : (n_basis, input_dim)
        # result after torch.mul : (output_dim, n_basis, input_dim)
        # result after sum : (output_dim, input_dim)
        weights = torch.mul(self.coeffs, self.basis)
        weights = weights.sum(dim=-2)

        # Output shape: torch.Size([minibatch, output_dim])
        out = X @ weights.T + self.bias

        return out


class GInvariantMLP(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Replace the first layer with a G-invariant layer.
        self.first_layer = GInvariantLayer(784, 50)
        
        self.mlp = nn.Sequential(
                self.first_layer,
                nn.ReLU(),
                nn.Linear(50, 10)
        )
                
    def forward(self, X):
        out = self.mlp(X)
        return out

        
model = GInvariantMLP()


# G-invariant MLP with 1 hidden layer.
rng = torch.Generator("cpu")
rng.manual_seed(0)

# Use Pytorch's pre-defined SGD optimizer
optimizer = torch.optim.SGD(model.parameters(),lr=LR)
best_acc = 0
best_state_dict = copy.deepcopy(model.state_dict())
print("=====", "=" * 8)
print("Epoch", "Train Accuracy")
print("-----", "-" * 8)
linear_transform = torch.Tensor(transform_mat.T)
for epc in range(NUM_EPOCHS):
    for batch_cnt, batch_itr in enumerate(
        torch.randperm(
            len(train_input),
            generator=rng,
        )[0:NUM_SAMPLES].view(
            NUM_BATCHES, BATCH_SIZE,
        ),
    ):
        optimizer.zero_grad()
        train_output = model.forward(
            train_input[batch_itr],
        )
        train_loss = torch.nn.functional.cross_entropy(
            train_output, train_target[batch_itr],
        )
        train_loss.backward()
        optimizer.step()
    valid_acc = accuracy(
        model,
        valid_input,
        valid_target,
    )
    if (valid_acc > best_acc):
        print("{:5d} {:.6f}".format(epc + 1, valid_acc))
        best_acc = valid_acc
        best_state_dict = copy.deepcopy(model.state_dict())
    elif ((epc + 1) % 50 == 0):
        print("{:5d} {:.6f}".format(epc + 1, valid_acc))
    else:
        pass
print("=====", "=" * 8)
model.load_state_dict(best_state_dict)
train_acc = accuracy(
    model,
    train_input,
    train_target,
)
valid_acc = accuracy(
    model,
    valid_input,
    valid_target,
)
test_acc = accuracy(
    model,
    test_input,
    test_target,
)
aug_test_acc = accuracy(
    model,
    aug_test_input,
    aug_test_target,
)
print("=====", "=" * 8)
print("Data  ", "Accuracy")
print("-----", "-" * 8)
print("Original Train", "{:.6f}".format(train_acc))
print("Original Valid", "{:.6f}".format(valid_acc))
print("Original Test ", "{:.6f}".format(test_acc))
print("Rotated  Test  ", "{:.6f}".format(aug_test_acc))
print("=====", "=" * 8)

===== ========
Epoch Train Accuracy
----- --------
    1 0.308690
    2 0.390732
    3 0.473029
    4 0.488353
    5 0.515366
    6 0.522194
    7 0.528756
    8 0.564369
    9 0.580975
   10 0.585732
   12 0.587812
   13 0.601295
   16 0.620082
   18 0.621791
   20 0.625342
   22 0.631214
   23 0.638681
   24 0.643150
   27 0.652897
   30 0.654865
   31 0.659390
   35 0.667046
   36 0.669251
   37 0.669282
   41 0.676899
   44 0.681269
   50 0.677041
   55 0.682113
   56 0.686409
   57 0.688299
   61 0.690130
   77 0.692890
   85 0.695984
   90 0.696916
   96 0.697655
  100 0.695011
===== ========
===== ========
Data   Accuracy
----- --------
Original Train 0.713258
Original Valid 0.697655
Original Test  0.709855
Rotated  Test   0.709855
===== ========

CS69000DPL - Deep Learning¶

Spring 2023¶

Instructor and Copyright: Bruno Ribeiro with help by S. Chandra Mouli¶

Learning G-Invariant Representations¶

Reading assigment for this lecture:¶

Understanding Symmetries¶

Definition of a Group¶

Extra Properties¶

General Linear Transformation Group of Images¶

An important group: The permutation group¶

Applications of the Symmetric Group¶

Common Mistake¶

Group Representations¶

Group-Invariant ($G$-Invariant) Transformation¶

Group-Equivaviant ($G$-Equivariant) Transformation¶

$G$-Invariant Representations¶

First: Understading Invariant Subspaces¶

Second: Reynods Operator Eigenvalues¶

$G$-Invariant Neuron¶

$G$-Invariant Neuron¶

Example¶

First: Get the eigenvectors of the Reynolds operator¶

Eigenvectors of the Reynolds Operator¶

Standard MLP¶

G-Invariant MLP¶

G-equivariant Representations¶

Group-Equivariant ($G$-Equivariant) Neurons¶

Lie Groups (Optional)¶