Basics of building a MLP

import torch
import torch.nn as nn
# Performs the operation  𝐴𝑥+𝑏 , where  𝐴  and  𝑏  are initialized randomly
linear = nn.Linear(10, 2) # nn.Linear(input dim=10, output dim=2) will take in a  𝑛×10  matrix and return an  𝑛×2  matrix 
example_input = torch.randn(3, 10)
example_output = linear(example_input)
example_output
tensor([[ 0.5458,  0.1715],
        [ 0.0310, -0.2061],
        [ 1.3804, -0.1380]], grad_fn=<AddmmBackward>)
# ReLU non-linearity sets all negative numbers in a tensor to zero
relu = nn.ReLU()
relu_output = relu(example_output)
relu_output
tensor([[0.0000, 0.3189],
        [0.0000, 0.6577],
        [0.1315, 0.0000]], grad_fn=<ReluBackward0>)
# Rescale a batch of  𝑛  inputs to have a consistent mean and standard deviation between batches
batchnorm = nn.BatchNorm1d(2)
batchnorm_output = batchnorm(relu_output)
batchnorm_output
tensor([[-0.7062, -0.0247],
        [-0.7062,  1.2368],
        [ 1.4124, -1.2121]], grad_fn=<NativeBatchNormBackward>)
# do it with one operation
mlp_layer = nn.Sequential(
    nn.Linear(5, 2),
    nn.ReLU(),
    nn.BatchNorm1d(2)

)

test_example = torch.randn(5,5)
print("input: ")
print(test_example)
print("output: ")
print(mlp_layer(test_example))
input: 
tensor([[ 1.4355,  0.1143,  0.0974,  0.2137,  1.9092],
        [-0.7489, -1.7915,  0.3816, -0.0109,  1.3555],
        [-1.8546, -2.9966,  0.5250, -0.4175,  1.0728],
        [ 0.5806, -0.6192, -0.0937, -1.3968,  0.8309],
        [-1.2107, -0.3949,  0.8021, -0.7953, -0.5259]])
output: 
tensor([[-0.8080,  1.9998],
        [-0.8080, -0.5000],
        [-0.8080, -0.5000],
        [ 0.9855, -0.5000],
        [ 1.4384, -0.5000]], grad_fn=<NativeBatchNormBackward>)
# Optimizer
import torch.optim as optim
adam_opt = optim.Adam(mlp_layer.parameters(), lr=1e-1)
train_example = torch.randn(100,5) + 1
adam_opt.zero_grad()

for i in range(10):  
    # Loss function
    cur_loss = torch.abs(1 - mlp_layer(train_example)).mean()
    cur_loss.backward()
    adam_opt.step()
    print(cur_loss)
tensor(0.5653, grad_fn=<MeanBackward0>)
tensor(0.4999, grad_fn=<MeanBackward0>)
tensor(0.4354, grad_fn=<MeanBackward0>)
tensor(0.3699, grad_fn=<MeanBackward0>)
tensor(0.3009, grad_fn=<MeanBackward0>)
tensor(0.2271, grad_fn=<MeanBackward0>)
tensor(0.1474, grad_fn=<MeanBackward0>)
tensor(0.0749, grad_fn=<MeanBackward0>)
tensor(0.0687, grad_fn=<MeanBackward0>)
tensor(0.1651, grad_fn=<MeanBackward0>)