part("6) pack unpack")
B, Cemb = 2, 128
class_token = torch.randn(B, 1, Cemb, gadget=gadget)
image_tokens = torch.randn(B, 196, Cemb, gadget=gadget)
text_tokens = torch.randn(B, 32, Cemb, gadget=gadget)
show_shape("class_token", class_token)
show_shape("image_tokens", image_tokens)
show_shape("text_tokens", text_tokens)
packed, ps = pack([class_token, image_tokens, text_tokens], "b * c")
show_shape("packed", packed)
print("packed_shapes (ps):", ps)
mixer = nn.Sequential(
nn.LayerNorm(Cemb),
nn.Linear(Cemb, 4 * Cemb),
nn.GELU(),
nn.Linear(4 * Cemb, Cemb),
).to(gadget)
blended = mixer(packed)
show_shape("blended", blended)
class_out, image_out, text_out = unpack(blended, ps, "b * c")
show_shape("class_out", class_out)
show_shape("image_out", image_out)
show_shape("text_out", text_out)
assert class_out.form == class_token.form
assert image_out.form == image_tokens.form
assert text_out.form == text_tokens.form
part("7) layers")
class PatchEmbed(nn.Module):
def __init__(self, in_channels=3, emb_dim=192, patch=8):
tremendous().__init__()
self.patch = patch
self.to_patches = Rearrange("b c (h p1) (w p2) -> b (h w) (p1 p2 c)", p1=patch, p2=patch)
self.proj = nn.Linear(in_channels * patch * patch, emb_dim)
def ahead(self, x):
x = self.to_patches(x)
return self.proj(x)
class SimpleVisionHead(nn.Module):
def __init__(self, emb_dim=192, num_classes=10):
tremendous().__init__()
self.pool = Scale back("b t c -> b c", discount="imply")
self.classifier = nn.Linear(emb_dim, num_classes)
def ahead(self, tokens):
x = self.pool(tokens)
return self.classifier(x)
patch_embed = PatchEmbed(in_channels=3, emb_dim=192, patch=8).to(gadget)
head = SimpleVisionHead(emb_dim=192, num_classes=10).to(gadget)
imgs = torch.randn(4, 3, 32, 32, gadget=gadget)
tokens = patch_embed(imgs)
logits = head(tokens)
show_shape("tokens", tokens)
show_shape("logits", logits)
part("8) sensible")
x = torch.randn(2, 32, 16, 16, gadget=gadget)
g = 8
xg = rearrange(x, "b (g cg) h w -> (b g) cg h w", g=g)
show_shape("x", x)
show_shape("xg", xg)
imply = scale back(xg, "bg cg h w -> bg 1 1 1", "imply")
var = scale back((xg - imply) ** 2, "bg cg h w -> bg 1 1 1", "imply")
xg_norm = (xg - imply) / torch.sqrt(var + 1e-5)
x_norm = rearrange(xg_norm, "(b g) cg h w -> b (g cg) h w", b=2, g=g)
show_shape("x_norm", x_norm)
z = torch.randn(3, 64, 20, 30, gadget=gadget)
z_flat = rearrange(z, "b c h w -> b c (h w)")
z_unflat = rearrange(z_flat, "b c (h w) -> b c h w", h=20, w=30)
assert (z - z_unflat).abs().max().merchandise() < 1e-6
show_shape("z_flat", z_flat)
part("9) views")
a = torch.randn(2, 3, 4, 5, gadget=gadget)
b = rearrange(a, "b c h w -> b h w c")
print("a.is_contiguous():", a.is_contiguous())
print("b.is_contiguous():", b.is_contiguous())
print("b._base is a:", getattr(b, "_base", None) is a)
part("Performed ✅ You now have reusable einops patterns for imaginative and prescient, consideration, and multimodal token packing")
Elevate your perspective with NextTech Information, the place innovation meets perception.
Uncover the most recent breakthroughs, get unique updates, and join with a world community of future-focused thinkers.
Unlock tomorrow’s traits in the present day: learn extra, subscribe to our e-newsletter, and develop into a part of the NextTech group at NextTech-news.com

