# Tensors in PyTorch

In [None]:
import numpy as np
import torch
from PIL import Image

## Tensors with all elements being zero

In [None]:
# A 1-d all-zero tensor (vector) with 10 elements
a = torch.zeros(10)
print(f"{type(a)=}")
print(f"{a=}")
print(f"{a.shape=}")
print(f"{a.ndim=}")

In [None]:
# A 2-d all-one tensor (matrix) with shape 3,4
b = torch.ones((3, 4))
print(f"{type(b)=}")
print(f"{b=}")
print(f"{b.shape=}")
print(f"{b.ndim=}")

## Representing a color image with 3-D tensor

In [None]:
# We first use PIL.Image to read an image and visualize it
img_pil = Image.open("cat.jpg")
img_pil

In [None]:
# We can convert this PIL image to a tensor with shape (H, W, C),
# where each element (pixel) is an uint8 (0-255).
# Unfortunately PyTorch cannot load PIL images directly,
# hence we need to convert them to a numpy array first.
img_array = torch.as_tensor(np.array(img_pil))
print(f"{img_array=}")
print(f"{img_array.shape=}")
print(f"{img_array.dtype=}")

## Manipulating tensors

In PyTorch, element-wise operation is written as follows:

In [None]:
# elementwise operation
a = torch.tensor([0, 1, 2])
b = torch.tensor([4, 5, 6])
print(f"{a=}")
print(f"{b=}")
print(f"elementwise addition: {a + b=}")  # equivalent to `torch.add(a, b)`
print(f"elementwise substraction: {a - b=}")  # equivalent to `torch.sub(a, b)`
print(f"elementwise multiplication: {a * b=}")  # equivalent to `torch.mul(a, b)`
print(f"elementwise division: {a / b=}")  # equivalent to `torch.div(a, b)`

For all element-wise operations, make sure both tensors have the same shape.
Otherwise the operation will fail and raise an error.

In [None]:
# We generate two tensor with shape of 5 and 6 filled with random numbers
# from a uniform distribution on the interval $[0, 1)$.
# In this case, the element-wise addition operation will fail and raise an error.
a = torch.rand(5)
b = torch.rand(6)
try:
    print(f"{a + b=}")
except RuntimeError as e:
    print(e)

The cool thing about tensor is we can run a large number of operations together.

In [None]:
x = torch.rand(1_000_000)
y = torch.rand(1_000_000)
print(f"{x + y=}")

In [None]:
# # Transpose

## View vs Reshape

Both operations manipuate the tensor and return a tensor with the same data with a specific shape.



In [None]:
a = torch.rand(6)
print(f"{a=}")
print(f"{a.view(2, 3)=}")
print(f"{a.reshape(2, 3)=}")

These two operations can yield similar outcomes but can work differently.\
`torch.view` merely creates a view of the original tensor and shares the underling data
with the original tensor.
To ensure the data sharing, `torch.view` can only operate on *contiguous* tensors,
meaning that the tensors to be viewed must are stored in contiguous memory.\
In contrast, `torch.reshape` does not have this constraint.
Whenever possible, the returned tensor will be a view of input.
Otherwise, it will be a copy.


A simple non-contigous case may arise because of transpose operation.



In [None]:
a = torch.rand(2, 3)
b = a.t()
print(f"{a=}")
try:
    print(f"{b.view(6)=}")
except RuntimeError as e:
    print(e)
print(f"{b.contiguous().view(6)=}")
print(f"{b.reshape(6)=}")

## Permute

`torch.permute` returns a view of the original tensor input with its dimensions permuted to have a desired ordering.
It can be thought of a generalized tranpose operation in N-D tensor.

Please beware that `torch.permute` is *NOT* the same as `torch.view`
even though they return tensors with same shapes sometimes.


In [None]:
a = torch.rand(2, 3)
print(f"{a.view(3, 2)=}")
print(f"{a.permute(1, 0)=}")

## Squeeze

`torch.squeeze` returns a tensor with all specified dimensions of input of size 1 removed.
Please note that you should NEVER call `torch.squeeze` without an argument specifying the dimensions to be removed.


In [None]:
a = torch.rand(3, 1, 1, 2)
print(f"{a.squeeze(1).shape=}")
print(f"{a.squeeze((1, 2)).shape=}")
print(f"{a.squeeze().shape=}")  # Illustration purpose only! You should NEVER use this.

## Indexing

The contents of a tensor can be accessed and modified using Pythonâ€™s indexing and slicing notation:


In [None]:
a = torch.rand(2, 3)
print(f"{a=}")
print(f"{a[1, 0]=}")
print(f"{a[1, :]=}")
print(f"{a[:, 2]=}")
print(a[:, [0, 2]])  # equivalent to torch.index_select(a, 1, torch.tensor([0, 2])))

## Broadcasting

## Broadcasting - Outer-product-like addition

Let's start from an example of outer product.
It can be implemented with a for-loop or an in-built function `torch.outer`.


In [None]:
a = torch.arange(6)
b = torch.arange(5)
c = torch.zeros((6, 5), dtype=a.dtype)
for i in range(6):
    for j in range(5):
        c[i, j] = a[i] * b[j]
print(f"{c=}")
print(f"{torch.outer(a, b)=}")

As you can see above, the outer product multiplies each pair of numbers from the two input tensors.

Let's say we want to add (instead of multiply) each pair of elements,
there is no such operation in PyTorch that can do this directly.
A slightly painful way to implement this is to use a for loop.


In [None]:
a = torch.arange(6)
b = torch.arange(5)
c = torch.zeros((6, 5), dtype=a.dtype)

for i in range(6):
    for j in range(5):
        c[i, j] = a[i] + b[j]

c

This, however, is not very efficient and is a lot of code.

PyTorch allows you to do this more efficiently using *broadcasting*.


In [None]:
a = torch.arange(6)
b = torch.arange(5)
print(a[:, None] + b[None, :])

Let's unpack our example of outer-product-like addition.
First, we can add a new dimension of shape 1 to a tensor by passing a `None` index like below.


In [None]:
a = torch.arange(10)
print(f"{a.shape=}")
print(f"{a[None].shape=}")
print(f"{a[:, None].shape=}")
print(f"{a[:, None, None].shape=}")
print(f"{a[None, :, None].shape=}")

After that, a tensor whose shape dimension is 1 can be expanded (or *broadcast*ed).
In this example of `a[:, None] + b[None, :]`, `a[:, None]` and `b[None, :]` has shape (6,1)
and shape (1, 5) respectively, so `a` and `b` are *broadcastable* and the resulting tensor will be expanded to (6, 5).


## Finding maximum distance between points


Let's look at another example.
Assume that we randomly generate a set of 100 2-D points from a 2-D space following
standard normal distribution (`x=torch.randn(100, 2)`).
We want to find the maximum distance between points.

A naive way is to compute the pairwise distance using a nested loop and compute the maximum thereon.
With broadcasting, we can do it in one line.


In [None]:
x = torch.randn(100, 2)
d = torch.zeros(100, 100)

# For-loop
for i in range(100):
    for j in range(i, 100):
        d[i, j] = ((x[i] - x[j]) ** 2).sum().sqrt()
print(f"{torch.max(d)=}")

# One-line with broadcasting
((x[:, None, :] - x[None, :, :]) ** 2).sum(2).sqrt().max()

## Matrix multiplcation in batches


Let's see another example.
Assume we want have a 3-d tensor `a` and a 2-d matrix `b`.
For each slice of `a` (`a[i, ...]`), we want to multiply it with matrix ` b`.
A naive way is to do the for loop but it's super slow when the number of slices becomes large.
With broadcasting, the operation is written in one line and executes faster.


In [None]:
a = torch.randn(100, 50, 200)
b = torch.randn(200, 100)

c = torch.empty(100, 50, 100)

%timeit for i in range(100): c[i, ...] = a[i, ...] @ b
%timeit a @ b

The gain is more significant if we move the tensor to the GPU.



In [None]:
if torch.cuda.is_available():
    # note this will fail if you don't have a GPU
    a = a.cuda()
    b = b.cuda()
    c = c.cuda()
    %timeit for i in range(100): c[i, ...] = a[i, ...] @ b
    %timeit a @ b

-

## Matrix multiplication

In [None]:
a = torch.rand(2, 4)
b = torch.rand(4, 3)
print(f"{a @ b=}")  # equivalent to `torch.matmul(a, b)`

Beware of the dimensions of matrices. The number of columns of tensor a must be equal to
the number of rows of tensor b. The example below does not work.



In [None]:
a = torch.rand(5, 5)
b = torch.rand(3, 5)
try:
    print(f"{a @ b=}")
except RuntimeError as e:
    print(e)

## Vector multiplication

Let's first look at vector and matrix multiplication.

In [None]:
M = torch.rand(3, 3)
v = torch.rand(3, 1)

print(f"matrix: {M=}")
print(f"column vector: {v=}")

print(f"column vector can be multiplied on the right {M @ v=}")

In [None]:
w = torch.rand(1, 3)

print(f"row vector: {w=}")

print(f"row vector can be multiplied on the left {w @ M=}")

Let's see vector-vector multiplication next:

In [None]:
print("Define two column vectors")

a = torch.rand(3, 1)
b = torch.rand(3, 1)

print(f"{a=}")
print(f"{b=}")

In [None]:
# Inner product
print("Inner product")
print(f"{a.T @ b=}")  # equivalent to `torch.dot(a.T, b)`

In [None]:
# Outer product
print("Outer product")
print(f"{a @ b.T=}")  # equivalent to `torch.dot(a, b.T)`

## Euclidean norm

In [None]:
v = torch.rand(3, 1)


print(f"{v=}")

print("we can compute the norm of a vector with definition")
print(f"{(v ** 2).sum().sqrt()=}")

print("or with torch.norm")

print(f"{torch.norm(v)=}")
print("warning: using torch.norm is slower")

## Frobenius norm

In [None]:
W = torch.rand(3, 3)

print(f"{W=}")

print("we can compute the norm of a matrix with definition")

print(f"{torch.sqrt(torch.sum(W ** 2))=}")

print("or with torch.norm")

print(f"{torch.norm(W)=}")
print("warning: using torch.norm is slower")