Skip to content

Commit

Permalink
weighting is already done when computing combine_tensor
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Aug 21, 2023
1 parent 0b6bf96 commit 4a073bc
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
2 changes: 0 additions & 2 deletions mixture_of_experts/mixture_of_experts.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,13 @@ def forward(self, x, importance = None):
# [batch, group]
position_in_expert_1 = position_in_expert_1.sum(dim=-1)
# Weight assigned to first expert. [batch, group]
gate_1 *= mask_1_flat

position_in_expert_2 = cumsum_exclusive(mask_2, dim=-2) + mask_1_count
position_in_expert_2 *= mask_2
mask_2 *= (position_in_expert_2 < expert_capacity_f).float()
mask_2_flat = mask_2.sum(dim=-1)

position_in_expert_2 = position_in_expert_2.sum(dim=-1)
gate_2 *= mask_2_flat

# [batch, group, experts, expert_capacity]
combine_tensor = (
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'mixture-of-experts',
packages = find_packages(),
version = '0.2.1',
version = '0.2.2',
license='MIT',
description = 'Sparsely-Gated Mixture of Experts for Pytorch',
author = 'Phil Wang',
Expand Down

0 comments on commit 4a073bc

Please sign in to comment.