update readme

lucidrains · Jul 17, 2020 · ea97fb3 · ea97fb3
1 parent a48cd5f
commit ea97fb3
Showing 1 changed file with 8 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -18,10 +18,10 @@ from mixture_of_experts import MoE
 inputs = torch.randn(4, 1024, 512)
 
 experts = MoE(
-	dim = 512,
-	num_experts = 16,  			# increase the experts (# parameters) of your model without increasing computation
-	hidden_dim = 512 * 4, 		# size of hidden dimension in each expert, defaults to 4 * dimension
-	activation = nn.LeakyReLU   # use your preferred activation, will default to ReLU
+    dim = 512,
+    num_experts = 16,           # increase the experts (# parameters) of your model without increasing computation
+    hidden_dim = 512 * 4,       # size of hidden dimension in each expert, defaults to 4 * dimension
+    activation = nn.LeakyReLU   # use your preferred activation, will default to ReLU
 )
 
 out, aux_loss = experts(inputs) # (4, 1024, 512), (1,)
@@ -31,10 +31,10 @@ out, aux_loss = experts(inputs) # (4, 1024, 512), (1,)
 
 ```bibtex
 @misc{lepikhin2020gshard,
-    title 	= {GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding},
-    author 	= {Dmitry Lepikhin and HyoukJoong Lee and Yuanzhong Xu and Dehao Chen and Orhan Firat and Yanping Huang and Maxim Krikun and Noam Shazeer and Zhifeng Chen},
-    year 	= {2020},
-    eprint 	= {2006.16668},
+    title   = {GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding},
+    author  = {Dmitry Lepikhin and HyoukJoong Lee and Yuanzhong Xu and Dehao Chen and Orhan Firat and Yanping Huang and Maxim Krikun and Noam Shazeer and Zhifeng Chen},
+    year    = {2020},
+    eprint  = {2006.16668},
     archivePrefix = {arXiv},
     primaryClass = {cs.CL}
 }