From 97132d6465d28c17d74401ed72d44549522a019e Mon Sep 17 00:00:00 2001
From: Sourav Chakraborty <souravzzz@yahoo.co.in>
Date: Thu, 31 Mar 2022 10:03:52 -0500
Subject: [PATCH] Allow benchmark to run on more than 4 nodes

dist.all_to_all_single(t, t) will fail if length of t is less the number of nodes. Increasing it to 1024 allows it to run on up to 1024 nodes.
---
 extend_distributed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extend_distributed.py b/extend_distributed.py
index 1f2c8a53..c6b2b9c9 100644
--- a/extend_distributed.py
+++ b/extend_distributed.py
@@ -164,7 +164,7 @@ def init_distributed(rank=-1, local_rank=-1, size=-1, use_gpu=False, backend="")
             print("Running on %d ranks using %s backend" % (my_size, backend))
         if hasattr(dist, "all_to_all_single"):
             try:
-                t = torch.zeros([4])
+                t = torch.zeros([1024])
                 if use_gpu:
                     t = t.cuda()
                 dist.all_to_all_single(t, t)