small tweaks to support the Apple Silicon M1 chip device 'mps'. But this is not yet faster because a lot of ops are still being implemented https://github.com/pytorch/pytorch/issues/77764 , in particular for us the layernorm backward as of today

2022-06-09 12:59:39 -07:00 · 2022-06-09 12:59:39 -07:00 · e0a08f234c
parent 8f79bd0126
commit e0a08f234c
1 changed files with 2 additions and 2 deletions
--- a/makemore.py
+++ b/makemore.py
@ -372,7 +372,7 @@ if __name__ == '__main__':
    parser.add_argument('--work-dir', '-o', type=str, default='out', help="output working directory")
    parser.add_argument('--resume', action='store_true', help="when this flag is used, we will resume optimization from existing model in the workdir")
    parser.add_argument('--num-workers', '-n', type=int, default=1, help="number of data workers for both train/test")
-    parser.add_argument('--device', type=str, default='cpu', help="device to use for compute, e.g. cpu|cuda|m1")
+    parser.add_argument('--device', type=str, default='cpu', help="device to use for compute, e.g. cpu|cuda|mps")
    parser.add_argument('--seed', type=int, default=1337, help="seed")
    # sampling
    parser.add_argument('--sample-only', action='store_true', help="just sample from the model and quit, don't train")
@ -446,7 +446,7 @@ if __name__ == '__main__':
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
-        if args.device != 'cpu':
+        if args.device == 'cuda':
            torch.cuda.synchronize()
        t1 = time.time()