Skip to content

Commit

Permalink
Prepare for detailed timings and benchmarking (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
nietras authored Jun 30, 2024
1 parent 2779da9 commit eb389b9
Show file tree
Hide file tree
Showing 19 changed files with 730 additions and 565 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
[State]
batch_size: 4
seq_len: 64
num_activations: 73323776
activationCount: 73323776
Logits TENSOR OK
dwte TENSOR OK
dwpe TENSOR OK
Expand Down Expand Up @@ -94,7 +94,7 @@ namespace nietras.LargeLanguageModel
{
public interface ILlm
{
unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, long parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t);
unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, System.IntPtr parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t);
unsafe void AttentionBackward(float* δoutput, float* postAttention, float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* δpreAttention, float* δpostAttention, float* δinput);
unsafe void AttentionForward(float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* preAttention, float* postAttention, float* output);
unsafe void CrossEntropyForward(float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* losses);
Expand All @@ -114,7 +114,7 @@ namespace nietras.LargeLanguageModel
public class Llm : nietras.LargeLanguageModel.ILlm
{
public Llm() { }
public virtual unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, long parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t) { }
public virtual unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, System.IntPtr parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t) { }
public virtual unsafe void AttentionBackward(float* δoutput, float* postAttention, float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* δpreAttention, float* δpostAttention, float* δinput) { }
public virtual unsafe void AttentionForward(float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* preAttention, float* postAttention, float* output) { }
public virtual unsafe void CrossEntropyForward(float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* losses) { }
Expand All @@ -139,7 +139,7 @@ namespace nietras.LargeLanguageModel
public class Llm_nietras : nietras.LargeLanguageModel.Llm
{
public Llm_nietras() { }
public override unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, long parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t) { }
public override unsafe void AdamW(float* gradients, float* ms, float* vs, float* parameters, System.IntPtr parameterCount, float learningRate, float beta1, float beta2, float eps, float weightDecay, int t) { }
public override unsafe void AttentionBackward(float* δoutput, float* postAttention, float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* δpreAttention, float* δpostAttention, float* δinput) { }
public override unsafe void GeLUBackward(float* δoutput, float* input, int count, float* δinput) { }
public override unsafe void GeLUForward(float* input, int count, float* output) { }
Expand Down
6 changes: 5 additions & 1 deletion bench.ps1
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
dotnet run -c Release -f net8.0 --project src\Sep.Benchmarks\Sep.Benchmarks.csproj -- -m --warmupCount 5 --minIterationCount 3 --maxIterationCount 9 --runtimes net80 --iterationTime 300
#!/usr/bin/env pwsh
param (
[string]$filter = "*"
)
dotnet run -c Release -f net8.0 --project src\Llm.Benchmarks\Llm.Benchmarks.csproj -- -m --warmupCount 1 --minIterationCount 1 --maxIterationCount 3 --runtimes net80 --iterationTime 300 --hide Method --filter $filter
1 change: 1 addition & 0 deletions run.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dotnet run -c Release -f net8.0 --project src\Llm\Llm.csproj
46 changes: 46 additions & 0 deletions src/Llm.Benchmarks/BenchmarkDotNetExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using System.Globalization;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Exporters;
using BenchmarkDotNet.Exporters.Csv;
using BenchmarkDotNet.Loggers;
using BenchmarkDotNet.Reports;
using Perfolizer.Horology;

namespace nietras.LargeLanguageModel.Benchmarks;

static class BenchmarkDotNetExtensions
{
public static void ExportToFile(this IExporter exporter, Summary summary, string filePath)
{
using var logger = new StreamLogger(filePath);
exporter.ExportToLog(summary, logger);
}

}

class CustomMarkdownExporter : MarkdownExporter
{
public CustomMarkdownExporter()
{
Dialect = "GitHub";
UseCodeBlocks = true;
CodeBlockStart = "```";
StartOfGroupHighlightStrategy = MarkdownHighlightStrategy.None;
ColumnsStartWithSeparator = true;
EscapeHtml = true;
}
}

class CustomCsvExporter : CsvExporter
{
public CustomCsvExporter()
: base(CsvSeparator.Semicolon, new SummaryStyle(
cultureInfo: CultureInfo.InvariantCulture,
printUnitsInHeader: true,
printUnitsInContent: false,
timeUnit: TimeUnit.Millisecond,
sizeUnit: SizeUnit.KB
))
{ }
}

68 changes: 68 additions & 0 deletions src/Llm.Benchmarks/Gpt2Bench.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using System.Collections.Generic;
using System.Diagnostics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Order;
using static nietras.LargeLanguageModel.Gpt2;

namespace nietras.LargeLanguageModel.Benchmarks;

[MemoryDiagnoser]
[GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByMethod)]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
#if DEBUG
[WarmupCount(1)]
[MinIterationCount(2)]
[MaxIterationCount(3)]
#endif
public class Gpt2Bench
{
const string DataDirectory = "../../../";
GPT2 _model = new();
ExpectedTensors _expectedInputsOutputs;
ParameterTensors _expectedGrads;
TimeLlm? _llm;
int _step;

[ParamsSource(nameof(NameParams))] // Attributes for params is challenging 👇
public string Name { get; set; } = nameof(Llm);
public static IEnumerable<string> NameParams() => LlmFactory.NameToCreate.Keys;

[GlobalSetup]
public void GlobalSetup()
{
Runner.DownloadBinaryFilesIfNotExists(Gpt2.FileNames, Gpt2.RemoteUrl,
DataDirectory, t => Trace.WriteLine(t));

// build the GPT-2 model from a checkpoint
BuildFromCheckpoint(ref _model, DataDirectory + ModelBinaryFileName);

(_expectedInputsOutputs, _expectedGrads) = ReadExpectedState(_model, DataDirectory);

var llm = LlmFactory.NameToCreate[Name]();
_llm = new TimeLlm(llm);
_step = 0;
}

[Benchmark]
public unsafe float Train()
{
var (loss, t) = TrainStep(ref _model,
_expectedInputsOutputs.InputTokenIndices, _expectedInputsOutputs.OutputTokenIndices,
_expectedInputsOutputs.BatchSize, _expectedInputsOutputs.TokenCount,
_llm!, _step);
++_step;
return loss;
}

[GlobalCleanup]
public unsafe void GlobalCleanup()
{
free(_expectedInputsOutputs.InputTokenIndices);
free(_expectedInputsOutputs.OutputTokenIndices);
free(_expectedInputsOutputs.ExpectedLogits);
free(_expectedInputsOutputs.ExpectedLoss);
free(_expectedGrads.MemoryPtr);
Free(ref _model);
}
}
1 change: 1 addition & 0 deletions src/Llm.Benchmarks/Llm.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.12" />
<PackageReference Include="Sep" Version="0.5.2" />
</ItemGroup>

</Project>
17 changes: 0 additions & 17 deletions src/Llm.Benchmarks/LlmBench.cs

This file was deleted.

106 changes: 96 additions & 10 deletions src/Llm.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -1,34 +1,120 @@
// Type 'Program' can be sealed because it has no subtypes in its containing assembly and is not externally visible
#pragma warning disable CA1852
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using BenchmarkDotNet.Columns;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Loggers;
using BenchmarkDotNet.Reports;
using BenchmarkDotNet.Running;
using nietras.LargeLanguageModel.Benchmarks;
using nietras.SeparatedValues;
[assembly: System.Runtime.InteropServices.ComVisible(false)]

Action<string> log = t => { Console.WriteLine(t); Trace.WriteLine(t); };

log($"{Environment.Version} args: {args.Length}");

if (args.Length > 0)
if (true || args.Length > 0)
{
var config = (Debugger.IsAttached ? new DebugInProcessConfig() : DefaultConfig.Instance)
.WithSummaryStyle(SummaryStyle.Default.WithMaxParameterColumnWidth(200))
//.AddColumn(MBPerSecFromCharsLength())
;
BenchmarkRunner.Run(typeof(LlmBench), config, args);
var name = nameof(Gpt2Bench);

var markdownExporter = new CustomMarkdownExporter();
var csvExporter = new CustomCsvExporter();
var config = //(Debugger.IsAttached ? new DebugInProcessConfig() : DefaultConfig.Instance)
ManualConfig.CreateEmpty()
.AddColumnProvider(DefaultColumnProviders.Instance)
.AddExporter(markdownExporter)
.AddExporter(csvExporter)
.AddLogger(ConsoleLogger.Default)
.WithSummaryStyle(SummaryStyle.Default.WithMaxParameterColumnWidth(200));

var summary = BenchmarkRunner.Run(typeof(Gpt2Bench), config, args);

var cpuInfo = summary.HostEnvironmentInfo.CpuInfo.Value;
var processorName = ProcessorBrandStringHelper.Prettify(cpuInfo);
var processorNameInDirectory = processorName
.Replace(" Processor", "").Replace(" CPU", "")
.Replace(" Graphics", "")
.Replace("/", "").Replace("\\", "")
.Replace(" ", ".");
log(processorName);

//var processorNameInDirectory = "AMD.Ryzen.7.PRO.7840U.w.Radeon.780M";

var sourceDirectory = GetSourceDirectory();
var benchmarksDirectory = $"{sourceDirectory}/../../benchmarks/";
var directory = $"{benchmarksDirectory}{processorNameInDirectory}";
if (!Directory.Exists(directory)) { Directory.CreateDirectory(directory); }

var filePathMd = Path.Combine(directory, $"{name}.md");
var filePathCsv = Path.Combine(directory, $"{name}.csv");

markdownExporter.ExportToFile(summary, filePathMd);
csvExporter.ExportToFile(summary, filePathCsv);

var filePathBoard = Path.Combine(directory, $"{name}-Board.csv");

UpdateBoardCsv(filePathCsv, filePathBoard);
}
else
{
var b = new LlmBench();
var b = new Gpt2Bench();
b.GlobalSetup();
b.Naive();
b.Train();
Thread.Sleep(200);
for (var i = 0; i < 200000000; i++)
for (var i = 0; i < 9; i++)
{
b.Naive();
b.Train();
}
b.GlobalCleanup();
}

static string GetSourceDirectory([CallerFilePath] string filePath = "") =>
Path.GetDirectoryName(filePath)!;

static void UpdateBoardCsv(string filePathCsv, string filePathBoard)
{
const string colNameName = "Name";
const string colNameMean = "Mean [ms]";

string[] colNames = [colNameName, colNameMean, "StdDev [ms]", "Allocated [KB]"];

var nameToCols = ReadNameToCols(filePathCsv, colNameName, colNameMean, colNames);
if (File.Exists(filePathBoard))
{
var nameToColsBoard = ReadNameToCols(filePathBoard, colNameName, colNameMean, colNames);
foreach (var (n, v) in nameToColsBoard)
{
if (!nameToCols.ContainsKey(n))
{
nameToCols[n] = v;
}
}
}

using var writerBoard = Sep.Writer().ToFile(filePathBoard);
var sorted = nameToCols.Values.OrderBy(v => v.Mean);
foreach (var (_, cols) in sorted)
{
using var writeRow = writerBoard.NewRow();
writeRow[colNames].Set(cols);
}
}

static Dictionary<string, (double Mean, string[] Cols)> ReadNameToCols(
string filePath, string colNameName, string colNameMean, string[] colNames)
{
using var reader = Sep
.Reader(o => o with { Unescape = true, DisableFastFloat = true })
.FromFile(filePath);
return reader.Enumerate(r => (Name: r[colNameName].ToString(),
Mean: r[colNameMean].Parse<double>(), Cols: r[colNames].ToStringsArray()))
.ToDictionary(t => t.Name, t => (t.Mean, t.Cols));
}
28 changes: 1 addition & 27 deletions src/Llm/Extensions.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand All @@ -17,7 +16,7 @@ public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, Span<T>
}
}

public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, T* values, long count)
public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, T* values, nint count)
where T : unmanaged
{
Span<T> buffer = stackalloc T[(256 * 1024) / Unsafe.SizeOf<T>()];
Expand All @@ -32,29 +31,4 @@ public static unsafe void ReadExactlyUnmanaged<T>(this FileStream file, T* value
totalReadCount += countToRead;
}
}

public static IEnumerable<(int i0, int i1)> Enumerate(int count0, int count1)
{
for (var i0 = 0; i0 < count0; i0++)
{
for (var i1 = 0; i1 < count1; i1++)
{
yield return (i0, i1);
}
}
}

public static IEnumerable<(int i0, int i1, int i2)> Enumerate(int count0, int count1, int count2)
{
for (var i0 = 0; i0 < count0; i0++)
{
for (var i1 = 0; i1 < count1; i1++)
{
for (var i2 = 0; i2 < count2; i2++)
{
yield return (i0, i1, i2);
}
}
}
}
}
Loading

0 comments on commit eb389b9

Please sign in to comment.