import torch
import torch.nn as nn
import numpy as np
# Reproducibility
rng = np.random.default_rng(0)
# Tiny synthetic dataset dimensions
T, F, N = 16, 3, 512 # time steps, features, samples
# Generate random sequences (N, T, F)
X = rng.normal(0, 0.5, size=(N, T, F)).astype("float32")
# Simple linear rule for next-step "return"
w = rng.normal(0, 0.2, size=(F,)).astype("float32")
y = (X[:, -1, :] @ w + rng.normal(0, 0.1, size=(N,))).astype("float32")
# Wrap into PyTorch Dataset + DataLoader
ds = torch.utils.data.TensorDataset(
torch.from_numpy(X),
torch.from_numpy(y)
)
dl = torch.utils.data.DataLoader(ds, batch_size=64, shuffle=True)
# GRU-based regressor
class SmallGRU(nn.Module):
def __init__(self, F, H=32):
super().__init__()
self.gru = nn.GRU(F, H, batch_first=True)
self.head = nn.Linear(H, 1)
def forward(self, x):
_, h = self.gru(x)
return self.head(h[-1]).squeeze(-1)
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = SmallGRU(F).to(device)
opt = torch.optim.AdamW(net.parameters(), lr=3e-3)
loss_fn = nn.L1Loss()
# Training loop
loss_history = []
for ep in range(1, 11):
net.train()
tot = 0
for xb, yb in dl:
xb, yb = xb.to(device), yb.to(device)
opt.zero_grad(set_to_none=True)
yhat = net(xb)
loss = loss_fn(yhat, yb)
loss.backward()
nn.utils.clip_grad_norm_(net.parameters(), 1.0)
opt.step()
tot += loss.item() * xb.size(0)
epoch_mae = tot / len(ds)
loss_history.append(epoch_mae)
print(f"epoch {ep:02d} | train_mae={epoch_mae:.4f}")
loss_history10 PyTorch Mini Training Loop
This chapter demonstrates a minimal PyTorch workflow: creating a synthetic dataset, wrapping it in a DataLoader, defining a small GRU-based model, and running a short training loop.
This example captures the core ideas introduced in Session 19 — tensors, datasets, batching, models, optimizers, and loss computation.
10.1 Synthetic Data + GRU Model
10.2 Explanation
This example captures several key concepts from the PyTorch training workflow:
- Synthetic dataset creation shows how PyTorch works even without real data.
TensorDataset&DataLoaderhandle batching, shuffling, and iteration.
- GRU model processes sequence data and returns an encoded representation.
- Optimizer (
AdamW) updates weights based on gradients.
L1Lossmeasures prediction error.
- Training loop performs forward pass → loss → backward pass → gradient step.
This minimal model is not meant for high performance—it simply illustrates how modern deep learning frameworks structure data pipelines and training logic.