small tweaks to support the Apple Silicon M1 chip device 'mps'. But t…

…his is not yet faster because a lot of ops are still being implemented pytorch/pytorch#77764 , in particular for us the layernorm backward as of today
karpathy · Jun 9, 2022 · e0a08f2 · e0a08f2
1 parent 8f79bd0
commit e0a08f2
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/makemore.py b/makemore.py
@@ -372,7 +372,7 @@ def create_datasets(input_file):
     parser.add_argument('--work-dir', '-o', type=str, default='out', help="output working directory")
     parser.add_argument('--resume', action='store_true', help="when this flag is used, we will resume optimization from existing model in the workdir")
     parser.add_argument('--num-workers', '-n', type=int, default=1, help="number of data workers for both train/test")
-    parser.add_argument('--device', type=str, default='cpu', help="device to use for compute, e.g. cpu|cuda|m1")
+    parser.add_argument('--device', type=str, default='cpu', help="device to use for compute, e.g. cpu|cuda|mps")
     parser.add_argument('--seed', type=int, default=1337, help="seed")
     # sampling
     parser.add_argument('--sample-only', action='store_true', help="just sample from the model and quit, don't train")
@@ -446,7 +446,7 @@ def create_datasets(input_file):
         loss.backward()
         torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
         optimizer.step()
-        if args.device != 'cpu':
+        if args.device == 'cuda':
             torch.cuda.synchronize()
         t1 = time.time()