Skip to content

Commit

Permalink
improved documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Eduardo Leao authored and Eduardo Leao committed Dec 29, 2023
1 parent 641780c commit 9b5c73a
Showing 1 changed file with 54 additions and 31 deletions.
85 changes: 54 additions & 31 deletions neuralforge/tensor_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,11 @@ def forward(self, a, b):
def backward(self, dz, z):
a, b = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
da = dz

# Rescale gradient to have the same shape as "a":
grad_dim = len(dz.shape)
in_dim = len(a.shape)
for _ in range(grad_dim - in_dim):
Expand All @@ -239,9 +241,11 @@ def backward(self, dz, z):
da = da.sum(axis=n, keepdims=True)
a.backward(da, z)

# Find gradients relative to "b", and make recursive calls if it requires gradients:
# Find gradients relative to "b", and pass it downstream:
if b.requires_grad:
db = dz

# Rescale gradient to have the same shape as "b":
grad_dim = len(dz.shape)
in_dim = len(b.shape)
for _ in range(grad_dim - in_dim):
Expand Down Expand Up @@ -276,7 +280,7 @@ def forward(self, a):
def backward(self, dz, z):
a = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
da = -dz
a.backward(da, z)
Expand Down Expand Up @@ -305,9 +309,12 @@ def forward(self, a, b):
def backward(self, dz, z):
a, b = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# d/da(a*b) = b, apply chain rule:
da = dz * b._data

# Rescale gradient to have the same shape as "a":
grad_dim = len(dz.shape)
in_dim = len(a.shape)
for _ in range(grad_dim - in_dim):
Expand All @@ -318,9 +325,12 @@ def backward(self, dz, z):
da = da.sum(axis=n, keepdims=True)
a.backward(da, z)

# Find gradients relative to "b", and make recursive calls if it requires gradients:
# Find gradients relative to "b", and pass it downstream:
if b.requires_grad:
# d/db(a*b) = a, apply chain rule:
db = dz * a._data

# Rescale gradient to have the same shape as "b":
grad_dim = len(dz.shape)
in_dim = len(b.shape)
for _ in range(grad_dim - in_dim):
Expand Down Expand Up @@ -355,9 +365,12 @@ def forward(self, a, b):
def backward(self, dz, z):
a, b = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
da = dz / b._data
# d/da(a/b) = (1/b), apply chain rule:
da = dz * (1 / b._data)

# Rescale gradient to have the same shape as "a":
grad_dim = len(dz.shape)
in_dim = len(a.shape)
for _ in range(grad_dim - in_dim):
Expand All @@ -368,9 +381,12 @@ def backward(self, dz, z):
da = da.sum(axis=n, keepdims=True)
a.backward(da, z)

# Find gradients relative to "b", and make recursive calls if it requires gradients:
# Find gradients relative to "b", and pass it downstream:
if b.requires_grad:
# d/db(a/b) = -(a/b^2), apply chain rule:
db = - dz * a._data / (b._data ** 2)

# Rescale gradient to have the same shape as "b":
grad_dim = len(dz.shape)
in_dim = len(b.shape)
for _ in range(grad_dim - in_dim):
Expand Down Expand Up @@ -405,9 +421,9 @@ def forward(self, a, b):
def backward(self, dz, z):
a, b = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Transpose the last 2 dimentions:
# Backprop through the matmul:
da = dz @ b._data.swapaxes(-1,-2)

# Get difference between "a" size and upstream "da" size, to broadcast grad into "a":
Expand All @@ -419,8 +435,9 @@ def backward(self, dz, z):

a.backward(da, z)

# Find gradients relative to "b", and make recursive calls if it requires gradients:
# Find gradients relative to "b", and pass it downstream:
if b.requires_grad:
# Backprop through the matmul:
db = a._data.swapaxes(-1,-2) @ dz

# Get difference between "b" size and upstream "db" size, to broadcast grad into "b":
Expand Down Expand Up @@ -459,17 +476,17 @@ def forward(self, a, dim, keepdims=False):
def backward(self, dz, z):
a, data, dim = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
da = dz
if a.shape != da.shape:
# Brodcast upstream derivative to the size of "a":
da = np.expand_dims(da, axis=dim)
da = da * np.ones_like(a._data)
dz = np.expand_dims(dz, axis=dim)
dz = dz * np.ones_like(a._data)
# Brodcast upstream output (max) to the size of "a":
max = np.expand_dims(data, axis=dim)
max = max * np.ones_like(a._data)
da = da * np.equal(a._data, max)
# Add upstream gradients to the [max] values:
da = dz * np.equal(a._data, max)
a.backward(da, z)

class Sum:
Expand All @@ -495,8 +512,9 @@ def forward(self, a, dim, keepdims):
def backward(self, dz, z):
a = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Expand upstream gradients to the shape of "a":
da = np.ones(a.shape) * dz
a.backward(da, z)

Expand All @@ -523,8 +541,9 @@ def forward(self, a, dim, keepdims):
def backward(self, dz, z):
a, dim = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Propagate through the mean(x) operation:
da = np.ones(a.shape) * dz
da /= np.prod(np.array(a.shape)[dim])
a.backward(da, z)
Expand Down Expand Up @@ -552,8 +571,9 @@ def forward(self, a, dim, keepdims):
def backward(self, dz, z):
a, dim = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Propagate through the var(x) operation:
da = np.ones(a.shape) * dz
da = da * 2 * (a._data - a._data.mean(axis=dim, keepdims=True)) / np.prod(np.array(a.shape)[dim])
a.backward(da, z)
Expand Down Expand Up @@ -582,8 +602,9 @@ def forward(self, a):
def backward(self, dz, z):
a, data = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# d/da(e^a) = e^a, apply the chain rule to the derivative of e^a:
da = data * dz
a.backward(da, z)

Expand All @@ -610,8 +631,9 @@ def forward(self, a):
def backward(self, dz, z):
a = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# d/da(ln(a)) = (1/a), apply the chain rule to the derivative of the natural log:
da = (1 / a._data) * dz
a.backward(da, z)

Expand All @@ -638,8 +660,9 @@ def forward(self, a):
def backward(self, dz, z):
a, data = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# d/dx(sqrt(a)) = (1/2) * (1/a), apply the chain rule to the derivative of the square root:
da = (1 / 2) * (1 / data) * dz
a.backward(da, z)

Expand Down Expand Up @@ -667,9 +690,9 @@ def forward(self, a, shape):
def backward(self, dz, z):
a = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:

# Reshape upstream gradients:
da = dz.reshape(a.shape)

a.backward(da, z)
Expand Down Expand Up @@ -697,9 +720,9 @@ def forward(self, a, *dims):
def backward(self, dz, z):
a, dims = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:

# Transpose upstream gradients:
da = dz.swapaxes(*dims)

a.backward(da, z)
Expand Down Expand Up @@ -734,10 +757,10 @@ def backward(self, dz, z):

dz = np.split(dz, len(tensors), dim)

# Find gradients relative to each tensor in "tensor", and make recursive calls if it requires gradients:
# Find gradients relative to each tensor in "tensor", and pass it downstream:
for i, tensor in enumerate(tensors):
if tensor.requires_grad:

# For every tensor that generated the output, get gradients relative to that part of "dz":
di = dz[i]

tensor.backward(di, z)
Expand Down Expand Up @@ -773,7 +796,7 @@ def backward(self, dz, z):

dz = np.split(dz, len(tensors), dim)

# Find gradients relative to each tensor in "tensor", and make recursive calls if it requires gradients:
# Find gradients relative to each tensor in "tensor", and pass it downstream:
for i, tensor in enumerate(tensors):
if tensor.requires_grad:
# For every tensor that generated the stack, get gradients relative to that part of "dz":
Expand Down Expand Up @@ -804,7 +827,7 @@ def forward(self, a, condition, value):
def backward(self, dz, z):
a = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Because some activations are just set to a value, this operation is not differentiable.
da = dz
Expand Down Expand Up @@ -834,7 +857,7 @@ def forward(self, a, index):
def backward(self, dz, z):
a, index = self.cache

# Find gradients relative to "a", and make recursive calls if it requires gradients:
# Find gradients relative to "a", and pass it downstream:
if a.requires_grad:
# Add upstream gradients to [index] part of da.
da = np.zeros_like(a._data)
Expand Down

0 comments on commit 9b5c73a

Please sign in to comment.