Skip to content

Commit

Permalink
Rename a few methods to C# PascalCase (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
nietras authored Apr 16, 2024
1 parent 07ea337 commit a7e55f7
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 115 deletions.
30 changes: 15 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,21 @@ namespace nietras.LargeLanguageModel
{
public static class Llm
{
public static unsafe void attention_backward(float* dinp, float* dpreatt, float* datt, float* dout, float* inp, float* att, int B, int T, int C, int NH) { }
public static unsafe void attention_forward(float* output, float* preatt, float* att, float* inp, int B, int T, int C, int NH) { }
public static unsafe void crossentropy_forward(float* losses, float* probs, int* targets, int B, int T, int V) { }
public static unsafe void crossentropy_softmax_backward(float* dlogits, float* dlosses, float* probs, int* targets, int B, int T, int V) { }
public static unsafe void encoder_backward(float* dwte, float* dwpe, float* dout, int* inp, int B, int T, int C) { }
public static unsafe void encoder_forward(float* output, int* inp, float* wte, float* wpe, int B, int T, int C) { }
public static unsafe void gelu_backward(float* dinp, float* inp, float* dout, int N) { }
public static unsafe void gelu_forward(float* output, float* inp, int N) { }
public static unsafe void layernorm_backward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, float* mean, float* rstd, int B, int T, int C) { }
public static unsafe void layernorm_forward(float* output, float* mean, float* rstd, float* inp, float* weight, float* bias, int B, int T, int C) { }
public static unsafe void matmul_backward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, int B, int T, int C, int OC) { }
public static unsafe void matmul_forward(float* output, float* inp, float* weight, float* bias, int B, int T, int C, int OC) { }
public static unsafe void residual_backward(float* dinp1, float* dinp2, float* dout, int N) { }
public static unsafe void residual_forward(float* output, float* inp1, float* inp2, int N) { }
public static unsafe void softmax_forward(float* probs, float* logits, int B, int T, int V) { }
public static unsafe void AttentionBackward(float* dinp, float* dpreatt, float* datt, float* dout, float* inp, float* att, int B, int T, int C, int NH) { }
public static unsafe void AttentionForward(float* output, float* preatt, float* att, float* inp, int B, int T, int C, int NH) { }
public static unsafe void CrossEntropyForward(float* losses, float* probs, int* targets, int B, int T, int V) { }
public static unsafe void CrossEntropySoftmaxBackward(float* dlogits, float* dlosses, float* probs, int* targets, int B, int T, int V) { }
public static unsafe void EncoderBackward(float* dwte, float* dwpe, float* dout, int* inp, int B, int T, int C) { }
public static unsafe void EncoderForward(float* output, int* inp, float* wte, float* wpe, int B, int T, int C) { }
public static unsafe void GeLUBackward(float* dinp, float* inp, float* dout, int N) { }
public static unsafe void GeLUForward(float* output, float* inp, int N) { }
public static unsafe void LayerNormBackward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, float* mean, float* rstd, int B, int T, int C) { }
public static unsafe void LayerNormForward(float* output, float* mean, float* rstd, float* inp, float* weight, float* bias, int B, int T, int C) { }
public static unsafe void MatMulBackward(float* dinp, float* dweight, float* dbias, float* dout, float* inp, float* weight, int B, int T, int C, int OC) { }
public static unsafe void MatMulForward(float* output, float* inp, float* weight, float* bias, int B, int T, int C, int OC) { }
public static unsafe void ResidualBackward(float* dinp1, float* dinp2, float* dout, int N) { }
public static unsafe void ResidualForward(float* output, float* inp1, float* inp2, int N) { }
public static unsafe void SoftmaxForward(float* probs, float* logits, int B, int T, int V) { }
}
}
```
31 changes: 30 additions & 1 deletion src/Llm/Extensions.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,38 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace nietras.LargeLanguageModel;

internal static class Extensions
{
public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, Span<T> values)
where T : unmanaged
{
fixed (T* ptr = values)
{
ReadExactlyUnmanaged(file, ptr, values.Length);
}
}

public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, T* values, long count)
where T : unmanaged
{
Span<T> buffer = stackalloc T[(256 * 1024) / Unsafe.SizeOf<T>()];
var totalReadCount = 0;
while (totalReadCount < count)
{
var countToRead = (int)Math.Min(buffer.Length, count - totalReadCount);
var bufferToRead = buffer.Slice(0, countToRead);
var span = MemoryMarshal.Cast<T, byte>(bufferToRead);
file.ReadExactly(span);
bufferToRead.CopyTo(new Span<T>(values + totalReadCount, countToRead));
totalReadCount += countToRead;
}
}

public static IEnumerable<(int i0, int i1)> Enumerate(int count0, int count1)
{
for (var i0 = 0; i0 < count0; i0++)
Expand Down
14 changes: 7 additions & 7 deletions src/Llm/Gpt2.Test.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public static unsafe void Test()
var dataDirectory = Path.Combine(location!, "../../../");
// build the GPT-2 model from a checkpoint
GPT2 model;
gpt2_build_from_checkpoint(&model, dataDirectory + "gpt2_124M.bin");
BuildFromCheckpoint(&model, dataDirectory + "gpt2_124M.bin");

int C = model.config.channels;
int V = model.config.vocab_size;
Expand All @@ -35,7 +35,7 @@ public static unsafe void Test()
Log($"seq_len: {T}");

ParameterTensors expected_grads;
float* expected_grads_memory = malloc_and_point_parameters(&expected_grads, model.param_sizes);
float* expected_grads_memory = AllocateAndPointParameters(&expected_grads, model.param_sizes);

// inputs and expected outputs, only used for error checking
int* x = malloc<int>(B * T);
Expand All @@ -61,9 +61,9 @@ public static unsafe void Test()
{
stopwatch.Restart();

gpt2_forward(&model, x, y, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model);
Forward(&model, x, y, B, T);
ZeroGrad(&model);
Backward(&model);

double time_elapsed_s = stopwatch.Elapsed.TotalSeconds;

Expand Down Expand Up @@ -126,7 +126,7 @@ public static unsafe void Test()
}
}

gpt2_update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);
Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.01f, step + 1);

// print the timing information at the end
Log($"step {step}: loss {model.mean_loss} (took {time_elapsed_s * 1000} ms)");
Expand Down Expand Up @@ -168,7 +168,7 @@ public static unsafe void Test()
free(expected_logits);
free(expected_loss);
free(expected_grads_memory);
gpt2_free(&model);
Free(&model);
}

// poor man's tensor checker
Expand Down
16 changes: 8 additions & 8 deletions src/Llm/Gpt2.Train.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static unsafe void Train()
var dataDirectory = Path.Combine(location!, "../../../");
// build the GPT-2 model from a checkpoint
GPT2 model;
gpt2_build_from_checkpoint(&model, dataDirectory + "gpt2_124M.bin");
BuildFromCheckpoint(&model, dataDirectory + "gpt2_124M.bin");

// build the DataLoaders from tokens files. for now use tiny_shakespeare if available, else tiny_stories
var tiny_stories_train = dataDirectory + "TinyStories_train.bin";
Expand Down Expand Up @@ -51,7 +51,7 @@ public static unsafe void Train()
for (int i = 0; i < val_num_batches; i++)
{
val_loader.dataloader_next_batch();
gpt2_forward(&model, val_loader.inputs, val_loader.targets, B, T);
Forward(&model, val_loader.inputs, val_loader.targets, B, T);
val_loss += model.mean_loss;
}
val_loss /= val_num_batches;
Expand All @@ -68,7 +68,7 @@ public static unsafe void Train()
// for each t, we re-compute all activations between 0 and t
// leaving this alone because you want separate code for inference anyway
// the inference here is just for sanity checking purposes
gpt2_forward(&model, gen_tokens, null, 1, t);
Forward(&model, gen_tokens, null, 1, t);
float* probs = model.acts.probs + (t - 1) * model.config.vocab_size;
float coin = random_f32(&rng_state);
int next_token = sample_mult(probs, model.config.vocab_size, coin);
Expand All @@ -85,15 +85,15 @@ public static unsafe void Train()
// do a training step
stopwatch.Restart();
train_loader.dataloader_next_batch();
gpt2_forward(&model, train_loader.inputs, train_loader.targets, B, T);
gpt2_zero_grad(&model);
gpt2_backward(&model);
gpt2_update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.0f, step + 1);
Forward(&model, train_loader.inputs, train_loader.targets, B, T);
ZeroGrad(&model);
Backward(&model);
Update(&model, 1e-4f, 0.9f, 0.999f, 1e-8f, 0.0f, step + 1);
double time_elapsed_ms = stopwatch.Elapsed.TotalMilliseconds;
Log($"step {step}: train loss {model.mean_loss} (took {time_elapsed_ms} ms)");
}

// free
gpt2_free(&model);
Free(&model);
}
}
Loading

0 comments on commit a7e55f7

Please sign in to comment.