Copied!







import torch
import torch.nn as nn
from tqdm import tqdm
import pandas as pd
torch.manual_seed(20)

import torch
import torch.nn as nn
from tqdm import tqdm
import pandas as pd
torch.manual_seed(20)





Copied!







import torch
import torch.nn as nn
from tqdm import tqdm
import pandas as pd
torch.manual_seed(20)

import torch
import torch.nn as nn
from tqdm import tqdm
import pandas as pd
torch.manual_seed(20)





Copied!







def train_model(model, X, y, lr=0.01, epochs=100):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    outputs = [X.numpy()]
    for epoch in tqdm(range(epochs)):
        # Forward pass
        predictions = model(X)
        outputs.append(predictions.detach().numpy())
        loss = criterion(predictions, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model, outputs

def animate_training(outputs, y, frame_duration=5, title='Training Animation'):
    import pandas as pd
    import plotly.express as px

    # Convert outputs to a pandas DataFrame for easier plotting
    frames = []
    for i, output in enumerate(outputs):
        df = pd.DataFrame(output, columns=['Feature 1', 'Feature 2'])
        df['Frame'] = i  # Add a frame identifier
        frames.append(df)

    total_frames = len(frames)
    n = total_frames // 100
    frames = frames[::n]  # Get every nth frame
    
    # Concatenate all frames into a single DataFrame
    animated_df = pd.concat(frames, ignore_index=True)


    fig = px.scatter(

        animated_df,
        width=600,
        height=600,
        x='Feature 1',
        y='Feature 2',
        animation_frame='Frame',
        title=title,
        labels={'Feature 1': 'Feature 1', 'Feature 2': 'Feature 2'}
    )

    # Add a scatterplot of y
    scatter_y = pd.DataFrame(y.numpy(), columns=['Feature 1', 'Feature 2'])
    scatter_y['Frame'] = -1  # Use -1 to indicate the original data
    for _, row in scatter_y.iterrows():
        fig.add_trace(px.scatter(
            pd.DataFrame([row]),
            x='Feature 1',
            y='Feature 2',
            color='Frame',

        ).data[0])

    # Adjust animation speed
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = frame_duration  # Set duration in milliseconds
    fig.update_layout(coloraxis_showscale=False)
    return fig

def train_model(model, X, y, lr=0.01, epochs=100):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    outputs = [X.numpy()]
    for epoch in tqdm(range(epochs)):
        # Forward pass
        predictions = model(X)
        outputs.append(predictions.detach().numpy())
        loss = criterion(predictions, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model, outputs

def animate_training(outputs, y, frame_duration=5, title='Training Animation'):
    import pandas as pd
    import plotly.express as px

    # Convert outputs to a pandas DataFrame for easier plotting
    frames = []
    for i, output in enumerate(outputs):
        df = pd.DataFrame(output, columns=['Feature 1', 'Feature 2'])
        df['Frame'] = i  # Add a frame identifier
        frames.append(df)

    total_frames = len(frames)
    n = total_frames // 100
    frames = frames[::n]  # Get every nth frame
    
    # Concatenate all frames into a single DataFrame
    animated_df = pd.concat(frames, ignore_index=True)


    fig = px.scatter(

        animated_df,
        width=600,
        height=600,
        x='Feature 1',
        y='Feature 2',
        animation_frame='Frame',
        title=title,
        labels={'Feature 1': 'Feature 1', 'Feature 2': 'Feature 2'}
    )

    # Add a scatterplot of y
    scatter_y = pd.DataFrame(y.numpy(), columns=['Feature 1', 'Feature 2'])
    scatter_y['Frame'] = -1  # Use -1 to indicate the original data
    for _, row in scatter_y.iterrows():
        fig.add_trace(px.scatter(
            pd.DataFrame([row]),
            x='Feature 1',
            y='Feature 2',
            color='Frame',

        ).data[0])

    # Adjust animation speed
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = frame_duration  # Set duration in milliseconds
    fig.update_layout(coloraxis_showscale=False)
    return fig





Copied!







def train_model(model, X, y, lr=0.01, epochs=100):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    outputs = [X.numpy()]
    for epoch in tqdm(range(epochs)):
        # Forward pass
        predictions = model(X)
        outputs.append(predictions.detach().numpy())
        loss = criterion(predictions, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model, outputs

def animate_training(outputs, y, frame_duration=5, title='Training Animation'):
    import pandas as pd
    import plotly.express as px

    # Convert outputs to a pandas DataFrame for easier plotting
    frames = []
    for i, output in enumerate(outputs):
        df = pd.DataFrame(output, columns=['Feature 1', 'Feature 2'])
        df['Frame'] = i  # Add a frame identifier
        frames.append(df)

    total_frames = len(frames)
    n = total_frames // 100
    frames = frames[::n]  # Get every nth frame
    
    # Concatenate all frames into a single DataFrame
    animated_df = pd.concat(frames, ignore_index=True)


    fig = px.scatter(

        animated_df,
        width=600,
        height=600,
        x='Feature 1',
        y='Feature 2',
        animation_frame='Frame',
        title=title,
        labels={'Feature 1': 'Feature 1', 'Feature 2': 'Feature 2'}
    )

    # Add a scatterplot of y
    scatter_y = pd.DataFrame(y.numpy(), columns=['Feature 1', 'Feature 2'])
    scatter_y['Frame'] = -1  # Use -1 to indicate the original data
    for _, row in scatter_y.iterrows():
        fig.add_trace(px.scatter(
            pd.DataFrame([row]),
            x='Feature 1',
            y='Feature 2',
            color='Frame',

        ).data[0])

    # Adjust animation speed
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = frame_duration  # Set duration in milliseconds
    fig.update_layout(coloraxis_showscale=False)
    return fig

def train_model(model, X, y, lr=0.01, epochs=100):
    
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    outputs = [X.numpy()]
    for epoch in tqdm(range(epochs)):
        # Forward pass
        predictions = model(X)
        outputs.append(predictions.detach().numpy())
        loss = criterion(predictions, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return model, outputs

def animate_training(outputs, y, frame_duration=5, title='Training Animation'):
    import pandas as pd
    import plotly.express as px

    # Convert outputs to a pandas DataFrame for easier plotting
    frames = []
    for i, output in enumerate(outputs):
        df = pd.DataFrame(output, columns=['Feature 1', 'Feature 2'])
        df['Frame'] = i  # Add a frame identifier
        frames.append(df)

    total_frames = len(frames)
    n = total_frames // 100
    frames = frames[::n]  # Get every nth frame
    
    # Concatenate all frames into a single DataFrame
    animated_df = pd.concat(frames, ignore_index=True)


    fig = px.scatter(

        animated_df,
        width=600,
        height=600,
        x='Feature 1',
        y='Feature 2',
        animation_frame='Frame',
        title=title,
        labels={'Feature 1': 'Feature 1', 'Feature 2': 'Feature 2'}
    )

    # Add a scatterplot of y
    scatter_y = pd.DataFrame(y.numpy(), columns=['Feature 1', 'Feature 2'])
    scatter_y['Frame'] = -1  # Use -1 to indicate the original data
    for _, row in scatter_y.iterrows():
        fig.add_trace(px.scatter(
            pd.DataFrame([row]),
            x='Feature 1',
            y='Feature 2',
            color='Frame',

        ).data[0])

    # Adjust animation speed
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = frame_duration  # Set duration in milliseconds
    fig.update_layout(coloraxis_showscale=False)
    return fig





Copied!







# Create a mock dataset
X = torch.randn(100, 2) * 5  # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y = X @ rotation_matrix + torch.tensor([5,5]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X = torch.randn(100, 2) * 5  # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y = X @ rotation_matrix + torch.tensor([5,5]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise





Copied!







# Create a mock dataset
X = torch.randn(100, 2) * 5  # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y = X @ rotation_matrix + torch.tensor([5,5]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X = torch.randn(100, 2) * 5  # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y = X @ rotation_matrix + torch.tensor([5,5]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise





Copied!







# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=1000
)

fig = animate_training(outputs, y, title='Linear data, linear model')
fig.show() 

# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=1000
)

fig = animate_training(outputs, y, title='Linear data, linear model')
fig.show()

100%|██████████| 1000/1000 [00:03<00:00, 257.16it/s]





Copied!







# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=1000
)

fig = animate_training(outputs, y, title='Linear data, linear model')
fig.show() 

# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=1000
)

fig = animate_training(outputs, y, title='Linear data, linear model')
fig.show()

100%|██████████| 1000/1000 [00:03<00:00, 257.16it/s]





Copied!







# Create a mock dataset
X1 = torch.randn(100, 2) + torch.tensor([3,-3])   # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
y1 = 3*X1 + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X2 = torch.randn(100, 2)  # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix2 = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y2 = (X2 @ rotation_matrix2) + torch.tensor([-3,3]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X3 = torch.randn(100, 2) + torch.tensor([3,3]) # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
y3 = -5*X3 - 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise


X = torch.cat((X1, X2, X3), dim=0)
y = torch.cat((y1, y2, y3), dim=0)

# Create a mock dataset
X1 = torch.randn(100, 2) + torch.tensor([3,-3])   # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
y1 = 3*X1 + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X2 = torch.randn(100, 2)  # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix2 = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y2 = (X2 @ rotation_matrix2) + torch.tensor([-3,3]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X3 = torch.randn(100, 2) + torch.tensor([3,3]) # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
y3 = -5*X3 - 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise


X = torch.cat((X1, X2, X3), dim=0)
y = torch.cat((y1, y2, y3), dim=0)





Copied!







# Create a mock dataset
X1 = torch.randn(100, 2) + torch.tensor([3,-3])   # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
y1 = 3*X1 + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X2 = torch.randn(100, 2)  # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix2 = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y2 = (X2 @ rotation_matrix2) + torch.tensor([-3,3]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X3 = torch.randn(100, 2) + torch.tensor([3,3]) # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
y3 = -5*X3 - 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise


X = torch.cat((X1, X2, X3), dim=0)
y = torch.cat((y1, y2, y3), dim=0)

# Create a mock dataset
X1 = torch.randn(100, 2) + torch.tensor([3,-3])   # 100 samples, 2 feature
theta = torch.tensor(30.0 * torch.pi / 180.0)  # Convert degrees to radians
y1 = 3*X1 + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X2 = torch.randn(100, 2)  # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
rotation_matrix2 = torch.tensor([
    [torch.cos(theta), -torch.sin(theta)],
    [torch.sin(theta), torch.cos(theta)]
])
y2 = (X2 @ rotation_matrix2) + torch.tensor([-3,3]) + 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise

# Create a mock dataset
X3 = torch.randn(100, 2) + torch.tensor([3,3]) # 100 samples, 2 feature
theta = torch.tensor(150.0 * torch.pi / 180.0)  # Convert degrees to radians
y3 = -5*X3 - 0.01 * torch.randn(100, 2)  # y = 3x + 2 + noise


X = torch.cat((X1, X2, X3), dim=0)
y = torch.cat((y1, y2, y3), dim=0)





Copied!







# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=500
)

fig = animate_training(outputs, y, title='Linear by parts data, linear model')
fig.show()

# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=500
)

fig = animate_training(outputs, y, title='Linear by parts data, linear model')
fig.show()

100%|██████████| 500/500 [00:00<00:00, 2659.77it/s]





Copied!







# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=500
)

fig = animate_training(outputs, y, title='Linear by parts data, linear model')
fig.show()

# Initialize the model, loss function, and optimizer
input_size = 2
output_size = 2
linear_model = nn.Linear(
    in_features=2,
    out_features=2,
)
linear_model.weight.data = torch.eye(2)  # Initializing with identity
linear_model.bias.data = torch.zeros(2)  # Initializing bias to zero

model, outputs = train_model(
    model=linear_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=500
)

fig = animate_training(outputs, y, title='Linear by parts data, linear model')
fig.show()

100%|██████████| 500/500 [00:00<00:00, 2659.77it/s]





Copied!







class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model')
fig.show()

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model')
fig.show()

100%|██████████| 5000/5000 [00:02<00:00, 1798.35it/s]





Copied!







class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model')
fig.show()

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model')
fig.show()

100%|██████████| 5000/5000 [00:02<00:00, 1798.35it/s]





Copied!







class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        x = self.r1(x)
        x = self.r2(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation')
fig.show()

class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        x = self.r1(x)
        x = self.r2(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation')
fig.show()

100%|██████████| 5000/5000 [00:05<00:00, 931.68it/s]





Copied!







class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        x = self.r1(x)
        x = self.r2(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation')
fig.show()

class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        x = self.r1(x)
        x = self.r2(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation')
fig.show()

100%|██████████| 5000/5000 [00:05<00:00, 931.68it/s]





Copied!







class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn3 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        #x = self.bn1(x)
        x = self.r1(x)
        x = self.bn2(x)
        x = self.r2(x)
        x = self.bn3(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation\nand batch normalization')
fig.show()

class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn3 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        #x = self.bn1(x)
        x = self.r1(x)
        x = self.bn2(x)
        x = self.r2(x)
        x = self.bn3(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation\nand batch normalization')
fig.show()

100%|██████████| 5000/5000 [00:09<00:00, 529.21it/s]





Copied!







class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn3 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        #x = self.bn1(x)
        x = self.r1(x)
        x = self.bn2(x)
        x = self.r2(x)
        x = self.bn3(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation\nand batch normalization')
fig.show()

class ResidualBlock(nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.nonlinearity = nn.ReLU()

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        residual = x
        x = self.nonlinearity(self.fc1(x))
        x = self.fc2(x)
        x += residual  # Add the residual connection
        return x
    
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn3 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.r1 = ResidualBlock(hidden_size)
        self.r2 = ResidualBlock(hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

        # Initialize weights to identity and biases to zero
        nn.init.eye_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.eye_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        x = self.fc1(x)
        #x = self.bn1(x)
        x = self.r1(x)
        x = self.bn2(x)
        x = self.r2(x)
        x = self.bn3(x)
        y = self.fc2(x)
        return y

# Create an instance of the MLP
hidden_size = 2  # Example hidden layer size
mlp_model = MLP(input_size, hidden_size, output_size)
model, outputs = train_model(
    model=mlp_model,
    X=X,
    y=y,
    lr=0.01,
    epochs=5000
)

fig = animate_training(outputs, y, frame_duration=1, title='Linear by parts data, MLP model with residual propagation\nand batch normalization')
fig.show()

100%|██████████| 5000/5000 [00:09<00:00, 529.21it/s]





Copied!







x1 = torch.linspace(-1, 1, 500)
x2 = torch.linspace(1, -1, 500)
X = torch.stack([x1, x2], dim=1)
X += 0.1 * torch.randn(500, 2)  # Adding noise to X

y = 2*X + X**2 +0.5

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0].numpy(), X[:, 1].numpy(), label='X', alpha=0.5)
plt.scatter(y[:, 0].numpy(), y[:, 1].numpy(), label='y', alpha=0.5)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Scatterplot of X and y')
plt.legend()
plt.show()

x1 = torch.linspace(-1, 1, 500)
x2 = torch.linspace(1, -1, 500)
X = torch.stack([x1, x2], dim=1)
X += 0.1 * torch.randn(500, 2)  # Adding noise to X

y = 2*X + X**2 +0.5

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0].numpy(), X[:, 1].numpy(), label='X', alpha=0.5)
plt.scatter(y[:, 0].numpy(), y[:, 1].numpy(), label='y', alpha=0.5)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Scatterplot of X and y')
plt.legend()
plt.show()





Copied!







x1 = torch.linspace(-1, 1, 500)
x2 = torch.linspace(1, -1, 500)
X = torch.stack([x1, x2], dim=1)
X += 0.1 * torch.randn(500, 2)  # Adding noise to X

y = 2*X + X**2 +0.5

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0].numpy(), X[:, 1].numpy(), label='X', alpha=0.5)
plt.scatter(y[:, 0].numpy(), y[:, 1].numpy(), label='y', alpha=0.5)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Scatterplot of X and y')
plt.legend()
plt.show()

x1 = torch.linspace(-1, 1, 500)
x2 = torch.linspace(1, -1, 500)
X = torch.stack([x1, x2], dim=1)
X += 0.1 * torch.randn(500, 2)  # Adding noise to X

y = 2*X + X**2 +0.5

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0].numpy(), X[:, 1].numpy(), label='X', alpha=0.5)
plt.scatter(y[:, 0].numpy(), y[:, 1].numpy(), label='y', alpha=0.5)
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Scatterplot of X and y')
plt.legend()
plt.show()

Keys	Action
`?`	Open this help
`n`	Next page
`p`	Previous page
`s`	Search

Multi-layer perceptrons¶

A simple dataset: a rotation + translation¶

¶

A more complicated dataset: linear by parts¶

Multi Layer Perceptron (MLP) models¶

Why MLP? A small example¶

The Vanishing Gradient problem¶

Visualizing the vanishing gradient¶

Residual blocks¶

Normalization¶

Conclusion¶

Practice¶