From 9c62045fb0df8b418aad7bb9bf9bf72b31f61e86 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Wed, 16 Oct 2024 14:08:19 -0400 Subject: [PATCH 001/126] HubSpotCellCostFunction --- .../balancer/HubSpotCellCostFunction.java | 150 ++++++++++++++++++ .../balancer/StochasticLoadBalancer.java | 7 + .../balancer/TestHubSpotCellCostFunction.java | 122 ++++++++++++++ 3 files changed, 279 insertions(+) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java new file mode 100644 index 000000000000..4b2cc516455c --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.util.HashSet; +import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * HubSpot addition: Cost function for balancing regions based on their cell prefix. This should not + * be upstreamed, and our upstream solution should instead focus on introduction of balancer + * conditionals; see HBASE-28513 + */ +public class HubSpotCellCostFunction extends CostFunction { + + private static final String HUBSPOT_CELL_COST_MULTIPLIER = + "hbase.master.balancer.stochastic.hubspotCellCost"; + private static final float DEFAULT_HUBSPOT_CELL_COST = 0; + + private int numCells; + private int numServers; + private RegionInfo[] regions; // not necessarily sorted + private int[][] regionLocations; + + HubSpotCellCostFunction(Configuration conf) { + this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); + } + + @Override + void prepare(BalancerClusterState cluster) { + numServers = cluster.numServers; + numCells = calcNumCells(cluster.regions); + regions = cluster.regions; + regionLocations = cluster.regionLocations; + super.prepare(cluster); + } + + @Override + protected double cost() { + return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); + } + + static int calculateCurrentCellCost(int numCells, int numServers, RegionInfo[] regions, + int[][] regionLocations) { + int bestCaseMaxCellsPerServer = (int) Math.ceil((double) numCells / numServers); + + int[] cellsPerServer = new int[numServers]; + Set cellsAccountedFor = new HashSet<>(numCells); + + for (int i = 0; i < regions.length; i++) { + int serverIndex = regionLocations[i][0]; + RegionInfo region = regions[i]; + Integer startCell = toCell(region.getStartKey()); + Integer stopCell = toCell(region.getEndKey()); + if (startCell == null) { + // first region. for lack of info, assume one cell + if (!cellsAccountedFor.contains(stopCell)) { + cellsAccountedFor.add(stopCell); + cellsPerServer[serverIndex] += 1; + } + } else if (stopCell == null) { + // last region. for lack of info, assume one cell + if (!cellsAccountedFor.contains(startCell)) { + cellsAccountedFor.add(startCell); + cellsPerServer[serverIndex] += 1; + } + } else { + // middle regions + for (int cell = startCell; cell <= stopCell; cell++) { + if (!cellsAccountedFor.contains(cell)) { + cellsAccountedFor.add(cell); + cellsPerServer[serverIndex] += 1; + } + } + } + } + + int currentMaxCellsPerServer = bestCaseMaxCellsPerServer; + for (int cells : cellsPerServer) { + currentMaxCellsPerServer = Math.max(currentMaxCellsPerServer, cells); + } + + return Math.max(0, currentMaxCellsPerServer - bestCaseMaxCellsPerServer); + } + + /** + * This method takes the smallest and greatest start/stop keys of all regions. From this, we can + * determine the number of two-byte cell prefixes that can exist between the start and stop keys. + * This won't work exactly correctly for the edge-case where the final region contains multiple + * cells, but it's a good enough approximation. + */ + static int calcNumCells(RegionInfo[] regionInfos) { + if (regionInfos == null || regionInfos.length == 0) { + return 0; + } + + int leastCell = Integer.MAX_VALUE; + int greatestCell = Integer.MIN_VALUE; + + for (RegionInfo regionInfo : regionInfos) { + Integer startCell = toCell(regionInfo.getStartKey()); + Integer stopCell = toCell(regionInfo.getEndKey()); + + if (startCell != null) { + if (startCell < leastCell) { + leastCell = startCell; + } + if (startCell > greatestCell) { + greatestCell = startCell; + } + } + + if (stopCell != null) { + if (stopCell < leastCell) { + leastCell = stopCell; + } + if (stopCell > greatestCell) { + greatestCell = stopCell; + } + } + } + + return greatestCell - leastCell + 1; + } + + private static Integer toCell(byte[] key) { + if (key == null || key.length < 2) { + return null; + } + return Bytes.readAsInt(key, 0, 2); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 607c5162ba47..9f8574b3d82e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -167,6 +167,9 @@ public enum GeneratorType { private RegionReplicaHostCostFunction regionReplicaHostCostFunction; private RegionReplicaRackCostFunction regionReplicaRackCostFunction; + // HubSpot addition + private HubSpotCellCostFunction cellCostFunction; + /** * Use to add balancer decision history to ring-buffer */ @@ -264,6 +267,10 @@ protected void loadConf(Configuration conf) { addCostFunction(new WriteRequestCostFunction(conf)); addCostFunction(new MemStoreSizeCostFunction(conf)); addCostFunction(new StoreFileCostFunction(conf)); + + // HubSpot addition: + addCostFunction(new HubSpotCellCostFunction(conf)); + loadCustomCostFunctions(conf); curFunctionCosts = new double[costFunctions.size()]; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java new file mode 100644 index 000000000000..31cf05eea931 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, SmallTests.class }) +public class TestHubSpotCellCostFunction { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestHubSpotCellCostFunction.class); + + @Test + public void testCellCountTypical() { + int numCells = + HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3) }); + assertEquals(4, numCells); + } + + @Test + public void testCellCountMultipleInRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 4), buildRegionInfo((short) 4, (short) 5) }); + assertEquals(6, numCells); + } + + @Test + public void testCellCountMultipleInLastRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 5) }); + assertEquals(6, numCells); + } + + @Test + public void testCellCountMultipleInFirstRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 2), buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, (short) 4), buildRegionInfo((short) 4, (short) 5) }); + assertEquals(6, numCells); + } + + @Test + public void testCellCountLastKeyNull() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }); + assertEquals(4, numCells); + } + + @Test + public void testCellCountFirstKeyNull() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 4) }); + assertEquals(4, numCells); + } + + @Test + public void testCellCountBothEndsNull() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }); + assertEquals(3, numCells); + } + + @Test + public void testCostBalanced() { + // 4 cells, 4 servers, perfectly balanced + int cost = HubSpotCellCostFunction.calculateCurrentCellCost(4, 4, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }); + assertEquals(0, cost); + } + + @Test + public void testCostImbalanced() { + // 4 cells, 4 servers, perfectly balanced + int cost = HubSpotCellCostFunction.calculateCurrentCellCost(4, 4, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }); + assertTrue(cost > 0); + } + + private RegionInfo buildRegionInfo(Short startCell, Short stopCell) { + return RegionInfoBuilder.newBuilder(TableName.valueOf("table")) + .setStartKey(startCell == null ? null : Bytes.toBytes(startCell)) + .setEndKey(stopCell == null ? null : Bytes.toBytes(stopCell)).build(); + } +} From e4f5a14b45b3e71726419359286c43776393c09c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 17 Oct 2024 09:20:06 -0400 Subject: [PATCH 002/126] Adjust to handle little endian cell encoding --- .../balancer/HubSpotCellCostFunction.java | 107 +++++++----------- 1 file changed, 42 insertions(+), 65 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 4b2cc516455c..c2d9fa8ec9aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,15 +17,20 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; /** - * HubSpot addition: Cost function for balancing regions based on their cell prefix. This should not - * be upstreamed, and our upstream solution should instead focus on introduction of balancer + * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. + * This should not be upstreamed, and our upstream solution should instead focus on introduction of balancer * conditionals; see HBASE-28513 */ public class HubSpotCellCostFunction extends CostFunction { @@ -43,8 +48,7 @@ public class HubSpotCellCostFunction extends CostFunction { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); } - @Override - void prepare(BalancerClusterState cluster) { + @Override void prepare(BalancerClusterState cluster) { numServers = cluster.numServers; numCells = calcNumCells(cluster.regions); regions = cluster.regions; @@ -52,8 +56,7 @@ void prepare(BalancerClusterState cluster) { super.prepare(cluster); } - @Override - protected double cost() { + @Override protected double cost() { return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); } @@ -62,34 +65,11 @@ static int calculateCurrentCellCost(int numCells, int numServers, RegionInfo[] r int bestCaseMaxCellsPerServer = (int) Math.ceil((double) numCells / numServers); int[] cellsPerServer = new int[numServers]; - Set cellsAccountedFor = new HashSet<>(numCells); - for (int i = 0; i < regions.length; i++) { int serverIndex = regionLocations[i][0]; RegionInfo region = regions[i]; - Integer startCell = toCell(region.getStartKey()); - Integer stopCell = toCell(region.getEndKey()); - if (startCell == null) { - // first region. for lack of info, assume one cell - if (!cellsAccountedFor.contains(stopCell)) { - cellsAccountedFor.add(stopCell); - cellsPerServer[serverIndex] += 1; - } - } else if (stopCell == null) { - // last region. for lack of info, assume one cell - if (!cellsAccountedFor.contains(startCell)) { - cellsAccountedFor.add(startCell); - cellsPerServer[serverIndex] += 1; - } - } else { - // middle regions - for (int cell = startCell; cell <= stopCell; cell++) { - if (!cellsAccountedFor.contains(cell)) { - cellsAccountedFor.add(cell); - cellsPerServer[serverIndex] += 1; - } - } - } + Set regionCells = toCells(region.getStartKey(), region.getEndKey()); + cellsPerServer[serverIndex] += regionCells.size(); } int currentMaxCellsPerServer = bestCaseMaxCellsPerServer; @@ -100,51 +80,48 @@ static int calculateCurrentCellCost(int numCells, int numServers, RegionInfo[] r return Math.max(0, currentMaxCellsPerServer - bestCaseMaxCellsPerServer); } - /** - * This method takes the smallest and greatest start/stop keys of all regions. From this, we can - * determine the number of two-byte cell prefixes that can exist between the start and stop keys. - * This won't work exactly correctly for the edge-case where the final region contains multiple - * cells, but it's a good enough approximation. - */ static int calcNumCells(RegionInfo[] regionInfos) { if (regionInfos == null || regionInfos.length == 0) { return 0; } - int leastCell = Integer.MAX_VALUE; - int greatestCell = Integer.MIN_VALUE; - - for (RegionInfo regionInfo : regionInfos) { - Integer startCell = toCell(regionInfo.getStartKey()); - Integer stopCell = toCell(regionInfo.getEndKey()); - - if (startCell != null) { - if (startCell < leastCell) { - leastCell = startCell; - } - if (startCell > greatestCell) { - greatestCell = startCell; - } - } - - if (stopCell != null) { - if (stopCell < leastCell) { - leastCell = stopCell; - } - if (stopCell > greatestCell) { - greatestCell = stopCell; - } - } + return Ints.checkedCast( + Arrays.stream(regionInfos).map(region -> toCells(region.getStartKey(), region.getEndKey())) + .flatMap(Set::stream).distinct().count()); + } + + private static Set toCells(byte[] start, byte[] stop) { + if (start == null && stop == null) { + return Collections.emptySet(); + } + + if (start == null) { + return Collections.singleton(toCell(stop)); + } + + if (stop == null) { + return Collections.singleton(toCell(start)); } - return greatestCell - leastCell + 1; + return range(start, stop); } - private static Integer toCell(byte[] key) { + private static Set range(byte[] start, byte[] stop) { + Set cells = new HashSet<>(); + + for (byte[] current = start; + Bytes.compareTo(current, stop) <= 0; current = Bytes.unsignedCopyAndIncrement(current)) { + cells.add(toCell(current)); + } + + return cells; + } + + private static Short toCell(byte[] key) { if (key == null || key.length < 2) { return null; } - return Bytes.readAsInt(key, 0, 2); - } + return ByteBuffer.wrap(key, 0, 2).order(ByteOrder.LITTLE_ENDIAN).getShort(); + } } From aad121fd64b61dead40d82a834687b36981bc385 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 17 Oct 2024 09:30:17 -0400 Subject: [PATCH 003/126] Mark as private --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index c2d9fa8ec9aa..4b40b6058b10 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -27,12 +27,14 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; +import org.apache.yetus.audience.InterfaceAudience; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. * This should not be upstreamed, and our upstream solution should instead focus on introduction of balancer * conditionals; see HBASE-28513 */ +@InterfaceAudience.Private public class HubSpotCellCostFunction extends CostFunction { private static final String HUBSPOT_CELL_COST_MULTIPLIER = From 9b5002b9539ba410ffde726ca372b906f122c93b Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 17 Oct 2024 15:16:52 -0400 Subject: [PATCH 004/126] Revert to big endian, simplify heuristics --- .../balancer/HubSpotCellCostFunction.java | 62 ++++--- .../balancer/TestHubSpotCellCostFunction.java | 157 +++++++++++------- 2 files changed, 130 insertions(+), 89 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 4b40b6058b10..5157148a0690 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,32 +17,32 @@ */ package org.apache.hadoop.hbase.master.balancer; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. * This should not be upstreamed, and our upstream solution should instead focus on introduction of balancer * conditionals; see HBASE-28513 */ -@InterfaceAudience.Private -public class HubSpotCellCostFunction extends CostFunction { +@InterfaceAudience.Private public class HubSpotCellCostFunction extends CostFunction { private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; private static final float DEFAULT_HUBSPOT_CELL_COST = 0; + // hack - hard code this for now + private static final short MAX_CELL_COUNT = 360; - private int numCells; private int numServers; + private short numCells; private RegionInfo[] regions; // not necessarily sorted private int[][] regionLocations; @@ -52,7 +52,7 @@ public class HubSpotCellCostFunction extends CostFunction { @Override void prepare(BalancerClusterState cluster) { numServers = cluster.numServers; - numCells = calcNumCells(cluster.regions); + numCells = calcNumCells(cluster.regions, MAX_CELL_COUNT); regions = cluster.regions; regionLocations = cluster.regionLocations; super.prepare(cluster); @@ -62,61 +62,57 @@ public class HubSpotCellCostFunction extends CostFunction { return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); } - static int calculateCurrentCellCost(int numCells, int numServers, RegionInfo[] regions, + static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions, int[][] regionLocations) { - int bestCaseMaxCellsPerServer = (int) Math.ceil((double) numCells / numServers); + int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); int[] cellsPerServer = new int[numServers]; for (int i = 0; i < regions.length; i++) { int serverIndex = regionLocations[i][0]; RegionInfo region = regions[i]; - Set regionCells = toCells(region.getStartKey(), region.getEndKey()); + Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); cellsPerServer[serverIndex] += regionCells.size(); } - int currentMaxCellsPerServer = bestCaseMaxCellsPerServer; - for (int cells : cellsPerServer) { - currentMaxCellsPerServer = Math.max(currentMaxCellsPerServer, cells); - } + int currentMaxCellsPerServer = + Arrays.stream(cellsPerServer).max().orElseGet(() -> bestCaseMaxCellsPerServer); return Math.max(0, currentMaxCellsPerServer - bestCaseMaxCellsPerServer); } - static int calcNumCells(RegionInfo[] regionInfos) { + static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { if (regionInfos == null || regionInfos.length == 0) { return 0; } - return Ints.checkedCast( - Arrays.stream(regionInfos).map(region -> toCells(region.getStartKey(), region.getEndKey())) - .flatMap(Set::stream).distinct().count()); + Set cellsInRegions = + Arrays.stream(regionInfos).map(region -> toCells(region.getStartKey(), region.getEndKey(), totalCellCount)) + .flatMap(Set::stream).collect(Collectors.toSet()); + return Shorts.checkedCast(cellsInRegions.size()); } - private static Set toCells(byte[] start, byte[] stop) { + private static Set toCells(byte[] start, byte[] stop, short numCells) { if (start == null && stop == null) { return Collections.emptySet(); } - if (start == null) { - return Collections.singleton(toCell(stop)); + if (stop == null || stop.length == 0) { + Set result = IntStream.range(toCell(start), numCells).mapToObj(x -> (short) x) + .collect(Collectors.toSet()); + return result; } - if (stop == null) { - return Collections.singleton(toCell(start)); + if (start == null || start.length == 0) { + return IntStream.range(0, toCell(stop)).mapToObj(x -> (short) x) + .collect(Collectors.toSet()); } return range(start, stop); } private static Set range(byte[] start, byte[] stop) { - Set cells = new HashSet<>(); - - for (byte[] current = start; - Bytes.compareTo(current, stop) <= 0; current = Bytes.unsignedCopyAndIncrement(current)) { - cells.add(toCell(current)); - } - - return cells; + return IntStream.range(toCell(start), toCell(stop)).mapToObj(val -> (short) val) + .collect(Collectors.toSet()); } private static Short toCell(byte[] key) { @@ -124,6 +120,6 @@ private static Short toCell(byte[] key) { return null; } - return ByteBuffer.wrap(key, 0, 2).order(ByteOrder.LITTLE_ENDIAN).getShort(); + return Bytes.toShort(key, 0, 2); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index 31cf05eea931..7ef0e5b5d2b1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -19,7 +19,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; - import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -31,92 +30,138 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -@Category({ MasterTests.class, SmallTests.class }) -public class TestHubSpotCellCostFunction { +@Category({ MasterTests.class, SmallTests.class }) public class TestHubSpotCellCostFunction { - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = + @ClassRule public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHubSpotCellCostFunction.class); - @Test - public void testCellCountTypical() { - int numCells = - HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), - buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3) }); - assertEquals(4, numCells); + @Test public void testCellCountTypical() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3) + }, + (short) 3 + ); + assertEquals(3, numCells); } - @Test - public void testCellCountMultipleInRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 4), buildRegionInfo((short) 4, (short) 5) }); - assertEquals(6, numCells); + @Test public void testCellCountMultipleInRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 4), + buildRegionInfo((short) 4, (short) 5) + }, + (short) 5); + assertEquals(5, numCells); } - @Test - public void testCellCountMultipleInLastRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 5) }); - assertEquals(6, numCells); + @Test public void testCellCountMultipleInLastRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, (short) 5) + }, + (short) 5); + assertEquals(5, numCells); } - @Test - public void testCellCountMultipleInFirstRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { - buildRegionInfo((short) 0, (short) 2), buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, (short) 4), buildRegionInfo((short) 4, (short) 5) }); - assertEquals(6, numCells); + @Test public void testCellCountMultipleInFirstRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { + buildRegionInfo((short) 0, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, (short) 4), + buildRegionInfo((short) 4, (short) 5) + }, + (short) 5); + assertEquals(5, numCells); } - @Test - public void testCellCountLastKeyNull() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }); + @Test public void testCellCountLastKeyNull() { + int numCells = HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + (short) 4); assertEquals(4, numCells); } - @Test - public void testCellCountFirstKeyNull() { + @Test public void testCellCountFirstKeyNull() { int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 4) }); + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, (short) 4) + }, + (short) 4); assertEquals(4, numCells); } - @Test - public void testCellCountBothEndsNull() { + @Test public void testCellCountBothEndsNull() { int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }); - assertEquals(3, numCells); + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + (short) 4); + assertEquals(4, numCells); } - @Test - public void testCostBalanced() { + @Test public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost(4, 4, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost( + (short) 4, + 4, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[][] { + { 0 }, + { 1 }, + { 2 }, + { 3 } } + ); + assertEquals(0, cost); } - @Test - public void testCostImbalanced() { + @Test public void testCostImbalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost(4, 4, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[][] { + { 0 }, + { 0 }, + { 0 }, + { 0 } + }); assertTrue(cost > 0); } private RegionInfo buildRegionInfo(Short startCell, Short stopCell) { - return RegionInfoBuilder.newBuilder(TableName.valueOf("table")) + RegionInfo result = RegionInfoBuilder.newBuilder(TableName.valueOf("table")) .setStartKey(startCell == null ? null : Bytes.toBytes(startCell)) .setEndKey(stopCell == null ? null : Bytes.toBytes(stopCell)).build(); + return result; } } From 9a954dda07a34e7956743e5d6bea90b4b4e1b3b1 Mon Sep 17 00:00:00 2001 From: Ray Mattingly Date: Tue, 29 Oct 2024 18:02:39 -0400 Subject: [PATCH 005/126] Fix NPE, add logging, run spotless --- .../balancer/HubSpotCellCostFunction.java | 41 +++-- .../balancer/TestHubSpotCellCostFunction.java | 146 +++++++----------- 2 files changed, 80 insertions(+), 107 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 5157148a0690..2d9a9e8219d5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -26,15 +26,21 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; /** - * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. - * This should not be upstreamed, and our upstream solution should instead focus on introduction of balancer - * conditionals; see HBASE-28513 + * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This + * should not be upstreamed, and our upstream solution should instead focus on introduction of + * balancer conditionals; see + * HBASE-28513 */ -@InterfaceAudience.Private public class HubSpotCellCostFunction extends CostFunction { +@InterfaceAudience.Private +public class HubSpotCellCostFunction extends CostFunction { + private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; private static final float DEFAULT_HUBSPOT_CELL_COST = 0; @@ -50,7 +56,8 @@ this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); } - @Override void prepare(BalancerClusterState cluster) { + @Override + void prepare(BalancerClusterState cluster) { numServers = cluster.numServers; numCells = calcNumCells(cluster.regions, MAX_CELL_COUNT); regions = cluster.regions; @@ -58,7 +65,8 @@ super.prepare(cluster); } - @Override protected double cost() { + @Override + protected double cost() { return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); } @@ -71,9 +79,14 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int serverIndex = regionLocations[i][0]; RegionInfo region = regions[i]; Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); + LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); cellsPerServer[serverIndex] += regionCells.size(); } + for (int i = 0; i < numServers; i++) { + LOG.info("Server {} has {} cells", i, cellsPerServer[i]); + } + int currentMaxCellsPerServer = Arrays.stream(cellsPerServer).max().orElseGet(() -> bestCaseMaxCellsPerServer); @@ -85,9 +98,9 @@ static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { return 0; } - Set cellsInRegions = - Arrays.stream(regionInfos).map(region -> toCells(region.getStartKey(), region.getEndKey(), totalCellCount)) - .flatMap(Set::stream).collect(Collectors.toSet()); + Set cellsInRegions = Arrays.stream(regionInfos) + .map(region -> toCells(region.getStartKey(), region.getEndKey(), totalCellCount)) + .flatMap(Set::stream).collect(Collectors.toSet()); return Shorts.checkedCast(cellsInRegions.size()); } @@ -96,15 +109,13 @@ private static Set toCells(byte[] start, byte[] stop, short numCells) { return Collections.emptySet(); } - if (stop == null || stop.length == 0) { - Set result = IntStream.range(toCell(start), numCells).mapToObj(x -> (short) x) + if (start != null && (stop == null || stop.length == 0)) { + return IntStream.range(toCell(start), numCells).mapToObj(x -> (short) x) .collect(Collectors.toSet()); - return result; } - if (start == null || start.length == 0) { - return IntStream.range(0, toCell(stop)).mapToObj(x -> (short) x) - .collect(Collectors.toSet()); + if (stop != null && (start == null || start.length == 0)) { + return IntStream.range(0, toCell(stop)).mapToObj(x -> (short) x).collect(Collectors.toSet()); } return range(start, stop); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index 7ef0e5b5d2b1..f15d93179312 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; + import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -30,131 +31,92 @@ import org.junit.Test; import org.junit.experimental.categories.Category; -@Category({ MasterTests.class, SmallTests.class }) public class TestHubSpotCellCostFunction { +@Category({ MasterTests.class, SmallTests.class }) +public class TestHubSpotCellCostFunction { - @ClassRule public static final HBaseClassTestRule CLASS_RULE = + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHubSpotCellCostFunction.class); - @Test public void testCellCountTypical() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3) - }, - (short) 3 - ); + @Test + public void testCellCountTypical() { + int numCells = + HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), + buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3) }, + (short) 3); assertEquals(3, numCells); } - @Test public void testCellCountMultipleInRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 4), - buildRegionInfo((short) 4, (short) 5) - }, - (short) 5); + @Test + public void testCellCountMultipleInRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 4), buildRegionInfo((short) 4, (short) 5) }, (short) 5); assertEquals(5, numCells); } - @Test public void testCellCountMultipleInLastRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, (short) 5) - }, - (short) 5); + @Test + public void testCellCountMultipleInLastRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 5) }, (short) 5); assertEquals(5, numCells); } - @Test public void testCellCountMultipleInFirstRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo((short) 0, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, (short) 4), - buildRegionInfo((short) 4, (short) 5) - }, - (short) 5); + @Test + public void testCellCountMultipleInFirstRegion() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 2), buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, (short) 4), buildRegionInfo((short) 4, (short) 5) }, (short) 5); assertEquals(5, numCells); } - @Test public void testCellCountLastKeyNull() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo((short) 0, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, - (short) 4); + @Test + public void testCellCountLastKeyNull() { + int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, (short) 4); assertEquals(4, numCells); } - @Test public void testCellCountFirstKeyNull() { - int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, (short) 4) - }, - (short) 4); + @Test + public void testCellCountFirstKeyNull() { + int numCells = + HubSpotCellCostFunction.calcNumCells( + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 4) }, + (short) 4); assertEquals(4, numCells); } - @Test public void testCellCountBothEndsNull() { + @Test + public void testCellCountBothEndsNull() { int numCells = HubSpotCellCostFunction.calcNumCells( - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, (short) 4); assertEquals(4, numCells); } - @Test public void testCostBalanced() { + @Test + public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost( - (short) 4, - 4, - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, - new int[][] { - { 0 }, - { 1 }, - { 2 }, - { 3 } } - ); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }); assertEquals(0, cost); } - @Test public void testCostImbalanced() { + @Test + public void testCostImbalanced() { // 4 cells, 4 servers, perfectly balanced int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, - new int[][] { - { 0 }, - { 0 }, - { 0 }, - { 0 } - }); + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }); assertTrue(cost > 0); } From 6271c2692cf5eac25333fadde6eb351a5eb65916 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 29 Oct 2024 18:35:33 -0400 Subject: [PATCH 006/126] Clean up --- .../balancer/HubSpotCellCostFunction.java | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 2d9a9e8219d5..6228fb882a33 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -104,31 +104,35 @@ static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { return Shorts.checkedCast(cellsInRegions.size()); } - private static Set toCells(byte[] start, byte[] stop, short numCells) { - if (start == null && stop == null) { - return Collections.emptySet(); - } + private static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { + return range(padToTwoBytes(rawStart, (byte) 0), padToTwoBytes(rawStop, (byte) -1), numCells); + } - if (start != null && (stop == null || stop.length == 0)) { - return IntStream.range(toCell(start), numCells).mapToObj(x -> (short) x) - .collect(Collectors.toSet()); + private static byte[] padToTwoBytes(byte[] key, byte pad) { + if (key == null || key.length == 0) { + return new byte[] { pad, pad }; } - if (stop != null && (start == null || start.length == 0)) { - return IntStream.range(0, toCell(stop)).mapToObj(x -> (short) x).collect(Collectors.toSet()); + if (key.length == 1) { + return new byte[] { pad, key[0]}; } - return range(start, stop); + return key; } - private static Set range(byte[] start, byte[] stop) { - return IntStream.range(toCell(start), toCell(stop)).mapToObj(val -> (short) val) - .collect(Collectors.toSet()); + private static Set range(byte[] start, byte[] stop, short numCells) { + short stopCellId = toCell(stop); + if (stopCellId < 0 || stopCellId > numCells) { + stopCellId = numCells; + } + return IntStream.range(toCell(start), stopCellId) + .mapToObj(val -> (short) val).collect(Collectors.toSet()); } - private static Short toCell(byte[] key) { + private static short toCell(byte[] key) { if (key == null || key.length < 2) { - return null; + throw new IllegalArgumentException( + "Key must be at least 2 bytes long - passed " + Bytes.toHex(key)); } return Bytes.toShort(key, 0, 2); From 995b8cbd8336c83e66cb4cf18ed509226fddb1d9 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 30 Oct 2024 09:16:10 -0400 Subject: [PATCH 007/126] Add init debug --- .../balancer/HubSpotCellCostFunction.java | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 6228fb882a33..82af169a3b4e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.Arrays; -import java.util.Collections; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -56,17 +55,43 @@ public class HubSpotCellCostFunction extends CostFunction { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); } - @Override - void prepare(BalancerClusterState cluster) { + @Override void prepare(BalancerClusterState cluster) { numServers = cluster.numServers; numCells = calcNumCells(cluster.regions, MAX_CELL_COUNT); regions = cluster.regions; regionLocations = cluster.regionLocations; super.prepare(cluster); + + if (LOG.isDebugEnabled()) { + StringBuilder initString = new StringBuilder(); + + initString.append("Initializing HubSpotCellCostFunction:\n\t") + .append("numServers=").append(numServers) + .append("\n\tnumCells=").append(numCells) + .append("\n\tregions=\n").append(stringifyRegions(regions)) + .append("\n\tregionLocations=\n").append(Arrays.deepToString(regionLocations)); + + LOG.debug("{}", initString); + } + } + + private static String stringifyRegions(RegionInfo[] regions) { + return "[\n\t" + + Arrays.stream(regions) + .map(info -> + String.format( + "%s [%s, %s)", + info.getRegionNameAsString(), + Bytes.toHex(info.getStartKey()), + Bytes.toHex(info.getEndKey()) + ) + ) + .collect(Collectors.joining("\n\t")) + + "\n]"; + } - @Override - protected double cost() { + @Override protected double cost() { return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); } From d94d8626b25ffc1d8989840081885d2d74159953 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 30 Oct 2024 09:21:01 -0400 Subject: [PATCH 008/126] Clarify expectations via preconditions --- .../balancer/HubSpotCellCostFunction.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 82af169a3b4e..092ed70a88e8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,17 +79,18 @@ public class HubSpotCellCostFunction extends CostFunction { private static String stringifyRegions(RegionInfo[] regions) { return "[\n\t" + Arrays.stream(regions) - .map(info -> - String.format( - "%s [%s, %s)", - info.getRegionNameAsString(), - Bytes.toHex(info.getStartKey()), - Bytes.toHex(info.getEndKey()) - ) - ) + .map(HubSpotCellCostFunction::stringifyRegion ) .collect(Collectors.joining("\n\t")) + "\n]"; + } + private static String stringifyRegion(RegionInfo info) { + return String.format( + "%s [%s, %s)", + info.getRegionNameAsString(), + Bytes.toHex(info.getStartKey()), + Bytes.toHex(info.getEndKey()) + ); } @Override protected double cost() { @@ -98,9 +100,12 @@ private static String stringifyRegions(RegionInfo[] regions) { static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions, int[][] regionLocations) { int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); + Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); int[] cellsPerServer = new int[numServers]; for (int i = 0; i < regions.length; i++) { + Preconditions.checkNotNull(regions[i], "No region available at index " + i); + Preconditions.checkNotNull(regionLocations[i], "No region location available for " + stringifyRegion(regions[i])); int serverIndex = regionLocations[i][0]; RegionInfo region = regions[i]; Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); @@ -155,10 +160,7 @@ private static Set range(byte[] start, byte[] stop, short numCells) { } private static short toCell(byte[] key) { - if (key == null || key.length < 2) { - throw new IllegalArgumentException( - "Key must be at least 2 bytes long - passed " + Bytes.toHex(key)); - } + Preconditions.checkArgument(key != null && key.length >= 2, "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); return Bytes.toShort(key, 0, 2); } From 8202674f614e0af5b27a8dfae2106f479b689702 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 30 Oct 2024 11:03:22 -0400 Subject: [PATCH 009/126] Update debug and add guard for non default tables --- .../master/balancer/HubSpotCellCostFunction.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 092ed70a88e8..f202354b03aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -22,6 +22,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; @@ -102,6 +103,18 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); + if (LOG.isDebugEnabled()) { + Set tableAndNamespace = Arrays.stream(regions).map(RegionInfo::getTable) + .map(table -> table.getNameAsString() + "." + table.getNamespaceAsString()) + .collect(Collectors.toSet()); + LOG.debug("Calculating current cell cost for {} regions from these tables {}", regions.length, tableAndNamespace); + } + + if (regions.length > 0 && !regions[0].getTable().getNamespaceAsString().equals("default")) { + LOG.info("Skipping cost calculation for non-default namespace on {}", regions[0].getTable().getNameWithNamespaceInclAsString()); + return 0; + } + int[] cellsPerServer = new int[numServers]; for (int i = 0; i < regions.length; i++) { Preconditions.checkNotNull(regions[i], "No region available at index " + i); From f83dc2ecac00fa6b8da10a1a79d1c4e0c2eccb3d Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 30 Oct 2024 11:56:06 -0400 Subject: [PATCH 010/126] Emit setup at info level to ensure we see it --- .../balancer/HubSpotCellCostFunction.java | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index f202354b03aa..87c14ee9a94a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -22,14 +22,12 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - +import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; /** @@ -64,17 +62,14 @@ public class HubSpotCellCostFunction extends CostFunction { regionLocations = cluster.regionLocations; super.prepare(cluster); - if (LOG.isDebugEnabled()) { - StringBuilder initString = new StringBuilder(); + StringBuilder initString = new StringBuilder(); - initString.append("Initializing HubSpotCellCostFunction:\n\t") - .append("numServers=").append(numServers) - .append("\n\tnumCells=").append(numCells) - .append("\n\tregions=\n").append(stringifyRegions(regions)) - .append("\n\tregionLocations=\n").append(Arrays.deepToString(regionLocations)); + initString.append("Initializing HubSpotCellCostFunction:\n\t").append("numServers=") + .append(numServers).append("\n\tnumCells=").append(numCells).append("\n\tregions=\n") + .append(stringifyRegions(regions)).append("\n\tregionLocations=\n") + .append(Arrays.deepToString(regionLocations)); - LOG.debug("{}", initString); - } + LOG.info("{}", initString); } private static String stringifyRegions(RegionInfo[] regions) { From 8c6c48cc51312c5c18c3013a6fdd5c427db9a529 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 30 Oct 2024 14:33:52 -0400 Subject: [PATCH 011/126] Add info state dump on every cost calc call --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 87c14ee9a94a..13a1467784c3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -90,6 +90,16 @@ private static String stringifyRegion(RegionInfo info) { } @Override protected double cost() { + if (regions != null && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default")) { + StringBuilder stateString = new StringBuilder(); + stateString.append("Calculating cost for HubSpotCellCostFunction against default namespace:\n\t").append("numServers=") + .append(numServers).append("\n\tnumCells=").append(numCells).append("\n\tregions=\n") + .append(stringifyRegions(regions)).append("\n\tregionLocations=\n") + .append(Arrays.deepToString(regionLocations)); + + LOG.info("{}", stateString); + } + return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); } From ab52ea6e1c8da099d203b00d6084286d23876c93 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 11:36:10 -0400 Subject: [PATCH 012/126] Add some debug so we can see why regionlocation would be null --- .../hadoop/hbase/master/balancer/BalancerClusterState.java | 3 +++ .../hadoop/hbase/master/balancer/RegionLocationFinder.java | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 4a9bdfee708a..8507a13a8128 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -407,6 +407,7 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, if (regionFinder != null) { // region location List loc = regionFinder.getTopBlockLocations(region); + LOG.debug("{} is located on {} server", region.getRegionNameAsString(), loc.size()); regionLocations[regionIndex] = new int[loc.size()]; for (int i = 0; i < loc.size(); i++) { regionLocations[regionIndex][i] = loc.get(i) == null @@ -415,6 +416,8 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, ? -1 : serversToIndex.get(loc.get(i).getAddress())); } + } else { + LOG.warn("Region finder is null, not registering region {}", region.getRegionNameAsString()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index ab873380268d..85b15599e580 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -245,11 +245,15 @@ protected List getTopBlockLocations(RegionInfo region, String curren * @return ordered list of hosts holding blocks of the specified region */ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) { + String regionNameAsString = region.getRegionNameAsString(); + LOG.debug("Fetching top block locations for {}", regionNameAsString); try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { + LOG.debug("Region {} is located on {}", regionNameAsString, tableDescriptor.getTableName().getNameAsString()); HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); + LOG.debug("Top hosts for region {}: {}", regionNameAsString, blocksDistribution.getTopHosts()); return blocksDistribution; } } catch (IOException ioe) { From 0ac73bbab81783970af71f932408f4f589d62e3b Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 11:39:13 -0400 Subject: [PATCH 013/126] emit if we disable locationfinder --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 9f8574b3d82e..6adf33d1dffc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -501,6 +501,7 @@ protected List balanceTable(TableName tableName, (this.localityCost != null && this.localityCost.getMultiplier() > 0) || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0) ) { + LOG.debug("Didn't detect a need for region finder, disabling"); finder = this.regionFinder; } From 275ba6bbf76660d0448733aa90375b63ba1b0e92 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 11:44:53 -0400 Subject: [PATCH 014/126] Ensure the region finder is set if the cell cost function exists --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 6adf33d1dffc..e321004cbac9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -269,7 +269,8 @@ protected void loadConf(Configuration conf) { addCostFunction(new StoreFileCostFunction(conf)); // HubSpot addition: - addCostFunction(new HubSpotCellCostFunction(conf)); + cellCostFunction = new HubSpotCellCostFunction(conf); + addCostFunction(cellCostFunction); loadCustomCostFunctions(conf); @@ -315,9 +316,11 @@ public synchronized void updateClusterMetrics(ClusterMetrics st) { private void updateBalancerTableLoadInfo(TableName tableName, Map> loadOfOneTable) { RegionLocationFinder finder = null; + // HubSpot addition: if ( (this.localityCost != null && this.localityCost.getMultiplier() > 0) || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0) + || (this.cellCostFunction != null && this.cellCostFunction.getMultiplier() > 0) ) { finder = this.regionFinder; } From 1f58743f831a4591444561ab163d6fdfe5d2d2e8 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 12:30:27 -0400 Subject: [PATCH 015/126] Emit the multiplier --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 13a1467784c3..b3867fe43675 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -64,10 +64,12 @@ public class HubSpotCellCostFunction extends CostFunction { StringBuilder initString = new StringBuilder(); - initString.append("Initializing HubSpotCellCostFunction:\n\t").append("numServers=") - .append(numServers).append("\n\tnumCells=").append(numCells).append("\n\tregions=\n") - .append(stringifyRegions(regions)).append("\n\tregionLocations=\n") - .append(Arrays.deepToString(regionLocations)); + initString.append("Initializing HubSpotCellCostFunction:") + .append("\n\tnumServers=").append(numServers) + .append("\n\tnumCells=").append(numCells) + .append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) + .append("\n\tregions=\n").append(stringifyRegions(regions)) + .append("\n\tregionLocations=\n").append(Arrays.deepToString(regionLocations)); LOG.info("{}", initString); } From 57205da5d3ba08811cc880b82d3959ca29431fcd Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 12:41:58 -0400 Subject: [PATCH 016/126] Missed one spot --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index e321004cbac9..2a28715d6414 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -500,9 +500,11 @@ protected List balanceTable(TableName tableName, // Allow turning this feature off if the locality cost is not going to // be used in any computations. RegionLocationFinder finder = null; + // HubSpot addition: if ( (this.localityCost != null && this.localityCost.getMultiplier() > 0) || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0) + || (this.cellCostFunction != null && this.cellCostFunction.getMultiplier() > 0) ) { LOG.debug("Didn't detect a need for region finder, disabling"); finder = this.regionFinder; From a9e15470bee42e798b9697ae7550b045b57cb4f8 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 12:52:38 -0400 Subject: [PATCH 017/126] Fix debug --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 2a28715d6414..70e50826759f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -506,8 +506,9 @@ protected List balanceTable(TableName tableName, || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0) || (this.cellCostFunction != null && this.cellCostFunction.getMultiplier() > 0) ) { - LOG.debug("Didn't detect a need for region finder, disabling"); finder = this.regionFinder; + } else { + LOG.debug("Didn't detect a need for region finder, disabling"); } // The clusterState that is given to this method contains the state From b59c17c2973be74f7dfa0dd216983c46556870d3 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 14:01:57 -0400 Subject: [PATCH 018/126] skip any that snuck in, emit better logs, and fail more obviously here --- .../balancer/HubSpotCellCostFunction.java | 71 +++++++++++-------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index b3867fe43675..6f2781822b9e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.Arrays; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; @@ -48,6 +50,7 @@ public class HubSpotCellCostFunction extends CostFunction { private int numServers; private short numCells; + private ServerName[] servers; private RegionInfo[] regions; // not necessarily sorted private int[][] regionLocations; @@ -60,46 +63,47 @@ public class HubSpotCellCostFunction extends CostFunction { numCells = calcNumCells(cluster.regions, MAX_CELL_COUNT); regions = cluster.regions; regionLocations = cluster.regionLocations; + servers = cluster.servers; super.prepare(cluster); + LOG.info("Initializing {}", snapshotState()); + } + + private String snapshotState() { StringBuilder initString = new StringBuilder(); - initString.append("Initializing HubSpotCellCostFunction:") + initString.append("HubSpotCellCostFunction config:") .append("\n\tnumServers=").append(numServers) .append("\n\tnumCells=").append(numCells) .append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) - .append("\n\tregions=\n").append(stringifyRegions(regions)) - .append("\n\tregionLocations=\n").append(Arrays.deepToString(regionLocations)); - - LOG.info("{}", initString); - } + .append("\n\tregions=\n"); - private static String stringifyRegions(RegionInfo[] regions) { - return "[\n\t" + - Arrays.stream(regions) - .map(HubSpotCellCostFunction::stringifyRegion ) - .collect(Collectors.joining("\n\t")) + - "\n]"; - } + for (int i = 0; i < regions.length; i++) { + RegionInfo region = regions[i]; + int[] regionLocations = this.regionLocations[i]; + Optional highestLocalityServerMaybe = + Optional.ofNullable(regionLocations).filter(locations -> locations.length > 0) + .map(locations -> locations[0]).map(serverIndex -> this.servers[serverIndex]); + int assignedServers = Optional.ofNullable(regionLocations).map(locations -> locations.length) + .orElseGet(() -> 0); + + initString.append("\t") + .append(region.getShortNameToLog()) + .append("[") + .append(Bytes.toHex(region.getStartKey())) + .append(", ") + .append(Bytes.toHex(region.getEndKey())) + .append(") -> ") + .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) + .append( "(with ").append(assignedServers).append(" total candidates)"); + } - private static String stringifyRegion(RegionInfo info) { - return String.format( - "%s [%s, %s)", - info.getRegionNameAsString(), - Bytes.toHex(info.getStartKey()), - Bytes.toHex(info.getEndKey()) - ); + return initString.toString(); } @Override protected double cost() { if (regions != null && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default")) { - StringBuilder stateString = new StringBuilder(); - stateString.append("Calculating cost for HubSpotCellCostFunction against default namespace:\n\t").append("numServers=") - .append(numServers).append("\n\tnumCells=").append(numCells).append("\n\tregions=\n") - .append(stringifyRegions(regions)).append("\n\tregionLocations=\n") - .append(Arrays.deepToString(regionLocations)); - - LOG.info("{}", stateString); + LOG.info("Evaluating {}", snapshotState()); } return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); @@ -124,10 +128,17 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int[] cellsPerServer = new int[numServers]; for (int i = 0; i < regions.length; i++) { - Preconditions.checkNotNull(regions[i], "No region available at index " + i); - Preconditions.checkNotNull(regionLocations[i], "No region location available for " + stringifyRegion(regions[i])); - int serverIndex = regionLocations[i][0]; RegionInfo region = regions[i]; + Preconditions.checkNotNull(region, "No region available at index " + i); + if (!region.getTable().getNamespaceAsString().equals("default")) { + continue; + } + + int[] serverListForRegion = regionLocations[i]; + Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); + Preconditions.checkState(serverListForRegion.length > 0, "No servers available for " + region.getShortNameToLog()); + + int serverIndex = serverListForRegion[0]; Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); cellsPerServer[serverIndex] += regionCells.size(); From 840496d5b73177dec6a183190921cd938c11bce2 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 14:04:11 -0400 Subject: [PATCH 019/126] include count w/o servers --- .../balancer/HubSpotCellCostFunction.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 6f2781822b9e..05a1b51c961d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -70,13 +70,16 @@ public class HubSpotCellCostFunction extends CostFunction { } private String snapshotState() { - StringBuilder initString = new StringBuilder(); + StringBuilder stateString = new StringBuilder(); - initString.append("HubSpotCellCostFunction config:") + stateString.append("HubSpotCellCostFunction config:") .append("\n\tnumServers=").append(numServers) .append("\n\tnumCells=").append(numCells) .append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) - .append("\n\tregions=\n"); + .append("\n\tregions=\n["); + + int numAssigned = 0; + int numUnassigned = 0; for (int i = 0; i < regions.length; i++) { RegionInfo region = regions[i]; @@ -87,7 +90,13 @@ private String snapshotState() { int assignedServers = Optional.ofNullable(regionLocations).map(locations -> locations.length) .orElseGet(() -> 0); - initString.append("\t") + if (assignedServers > 0) { + numAssigned++; + } else { + numUnassigned++; + } + + stateString.append("\t") .append(region.getShortNameToLog()) .append("[") .append(Bytes.toHex(region.getStartKey())) @@ -98,7 +107,9 @@ private String snapshotState() { .append( "(with ").append(assignedServers).append(" total candidates)"); } - return initString.toString(); + stateString.append("\n]\n\n\tAssigned regions: ").append(numAssigned) + .append("\n\tUnassigned regions: ").append(numUnassigned).append("\n"); + return stateString.toString(); } @Override protected double cost() { From d7081eb10ceb679c4a5fc5872643419498ccacc3 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 14:47:58 -0400 Subject: [PATCH 020/126] Make it legible --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 05a1b51c961d..fce90a05c9a1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -96,7 +96,7 @@ private String snapshotState() { numUnassigned++; } - stateString.append("\t") + stateString.append("\n\t") .append(region.getShortNameToLog()) .append("[") .append(Bytes.toHex(region.getStartKey())) From 856e440966e762d0ffb6dd5c4407bb2951a02375 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 14:49:23 -0400 Subject: [PATCH 021/126] list details of the unknown region --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index fce90a05c9a1..08b459fc0d6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -147,7 +147,7 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int[] serverListForRegion = regionLocations[i]; Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); - Preconditions.checkState(serverListForRegion.length > 0, "No servers available for " + region.getShortNameToLog()); + Preconditions.checkState(serverListForRegion.length > 0, "No servers available for [%s] %s.%s", region.getTable().getNamespaceAsString(), region.getTable().getNameAsString(),region.getShortNameToLog()); int serverIndex = serverListForRegion[0]; Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); From 7df5fc24308b85f56dae63053b606e484d8f3cc8 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 14:53:48 -0400 Subject: [PATCH 022/126] Emit which table --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 08b459fc0d6a..969f7f9ebfd7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -24,6 +24,7 @@ import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; @@ -72,7 +73,13 @@ public class HubSpotCellCostFunction extends CostFunction { private String snapshotState() { StringBuilder stateString = new StringBuilder(); - stateString.append("HubSpotCellCostFunction config:") + stateString.append("HubSpotCellCostFunction config for ") + .append( + Optional.ofNullable(regions[0]) + .map(RegionInfo::getTable) + .map(TableName::getNameWithNamespaceInclAsString) + .orElseGet(() -> "N/A") + ).append(":") .append("\n\tnumServers=").append(numServers) .append("\n\tnumCells=").append(numCells) .append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) From a1d849f436a50b55aa05be61587c6e78b8f3b1f7 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 15:13:41 -0400 Subject: [PATCH 023/126] Skip if empty region server mapping, assume it's empty for now --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 969f7f9ebfd7..b3865c60b152 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -154,7 +154,12 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int[] serverListForRegion = regionLocations[i]; Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); - Preconditions.checkState(serverListForRegion.length > 0, "No servers available for [%s] %s.%s", region.getTable().getNamespaceAsString(), region.getTable().getNameAsString(),region.getShortNameToLog()); + + if (serverListForRegion.length == 0) { + LOG.warn("{}: no servers available, this may be an empty region", + region.getShortNameToLog()); + continue; + } int serverIndex = serverListForRegion[0]; Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); From 45c8182aec68741faa9c5884aeecb629c690dfac Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 16:34:01 -0400 Subject: [PATCH 024/126] Emit the cells in the region here --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index b3865c60b152..6169ebe3981f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -103,13 +103,18 @@ private String snapshotState() { numUnassigned++; } + String cellsInRegion = toCells(region.getStartKey(), region.getEndKey(), numCells).stream() + .map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); + stateString.append("\n\t") .append(region.getShortNameToLog()) .append("[") .append(Bytes.toHex(region.getStartKey())) .append(", ") .append(Bytes.toHex(region.getEndKey())) - .append(") -> ") + .append(") ") + .append(cellsInRegion) + .append(" -> ") .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) .append( "(with ").append(assignedServers).append(" total candidates)"); } From 1a67f81bca138804529399eadc463383a13547e9 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 16:35:52 -0400 Subject: [PATCH 025/126] Tell us about which cells this region holds --- .../balancer/HubSpotCellCostFunction.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 6169ebe3981f..bcb321bfcf3b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -103,8 +103,7 @@ private String snapshotState() { numUnassigned++; } - String cellsInRegion = toCells(region.getStartKey(), region.getEndKey(), numCells).stream() - .map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); + String cellsInRegion = toCellSetString(toCells(region.getStartKey(), region.getEndKey(), numCells)); stateString.append("\n\t") .append(region.getShortNameToLog()) @@ -124,6 +123,11 @@ private String snapshotState() { return stateString.toString(); } + private static String toCellSetString(Set cells) { + return cells.stream() + .map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); + } + @Override protected double cost() { if (regions != null && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default")) { LOG.info("Evaluating {}", snapshotState()); @@ -160,15 +164,16 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] int[] serverListForRegion = regionLocations[i]; Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); + Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); + LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); + if (serverListForRegion.length == 0) { - LOG.warn("{}: no servers available, this may be an empty region", - region.getShortNameToLog()); + LOG.warn("{} {}: no servers available, this may be an empty region", + region.getShortNameToLog(), toCellSetString(regionCells)); continue; } int serverIndex = serverListForRegion[0]; - Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); - LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); cellsPerServer[serverIndex] += regionCells.size(); } From 5328064647f6c98cc492fe0fd429a66da83a6a2c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 16:48:14 -0400 Subject: [PATCH 026/126] Add emission for region size --- .../balancer/HubSpotCellCostFunction.java | 18 +++++++++++++----- .../balancer/TestHubSpotCellCostFunction.java | 7 +++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index bcb321bfcf3b..13f7117cec1c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -20,6 +20,7 @@ import java.util.Arrays; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; @@ -133,11 +134,16 @@ private static String toCellSetString(Set cells) { LOG.info("Evaluating {}", snapshotState()); } - return calculateCurrentCellCost(numCells, numServers, regions, regionLocations); + return calculateCurrentCellCost(numCells, numServers, regions, regionLocations, super.cluster::getRegionSizeMB); } - static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions, - int[][] regionLocations) { + static int calculateCurrentCellCost( + short numCells, + int numServers, + RegionInfo[] regions, + int[][] regionLocations, + Function getRegionSizeMbFunc + ) { int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); @@ -168,8 +174,9 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); if (serverListForRegion.length == 0) { - LOG.warn("{} {}: no servers available, this may be an empty region", - region.getShortNameToLog(), toCellSetString(regionCells)); + int regionSizeMb = getRegionSizeMbFunc.apply(i); + LOG.warn("{} ({} mb) {}: no servers available, this {} an empty region", + region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells), regionSizeMb == 0 ? "IS" : "IS NOT"); continue; } @@ -181,6 +188,7 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] LOG.info("Server {} has {} cells", i, cellsPerServer[i]); } + int currentMaxCellsPerServer = Arrays.stream(cellsPerServer).max().orElseGet(() -> bestCaseMaxCellsPerServer); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index f15d93179312..e86453fd39b7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -30,10 +30,13 @@ import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.util.function.Function; @Category({ MasterTests.class, SmallTests.class }) public class TestHubSpotCellCostFunction { + private static final Function ALL_REGIONS_SIZE_1_MB = x -> 1; + @ClassRule public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHubSpotCellCostFunction.class); @@ -105,7 +108,7 @@ public void testCostBalanced() { int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }); + new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }, ALL_REGIONS_SIZE_1_MB); assertEquals(0, cost); } @@ -116,7 +119,7 @@ public void testCostImbalanced() { int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }); + new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }, ALL_REGIONS_SIZE_1_MB); assertTrue(cost > 0); } From 4490e1b700a214e64f10fce680e53c29091edf4c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 16:49:42 -0400 Subject: [PATCH 027/126] Make clear if we skip any non-empty regions --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 13f7117cec1c..8613c73e0562 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -175,8 +175,14 @@ static int calculateCurrentCellCost( if (serverListForRegion.length == 0) { int regionSizeMb = getRegionSizeMbFunc.apply(i); - LOG.warn("{} ({} mb) {}: no servers available, this {} an empty region", - region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells), regionSizeMb == 0 ? "IS" : "IS NOT"); + if (regionSizeMb == 0) { + LOG.trace("{} ({} mb) {}: no servers available, this IS an empty region", + region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells)); + } else { + LOG.warn("{} ({} mb) {}: no servers available, this IS NOT an empty region", + region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells)); + } + continue; } From 580e31cc408fdfa0546291c8b3678667da81a4ed Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 17:26:37 -0400 Subject: [PATCH 028/126] Include this --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 8613c73e0562..94fe5649aea6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -104,6 +104,7 @@ private String snapshotState() { numUnassigned++; } + int regionSizeMb = super.cluster.getRegionSizeMB(i); String cellsInRegion = toCellSetString(toCells(region.getStartKey(), region.getEndKey(), numCells)); stateString.append("\n\t") @@ -114,7 +115,7 @@ private String snapshotState() { .append(Bytes.toHex(region.getEndKey())) .append(") ") .append(cellsInRegion) - .append(" -> ") + .append(" [").append(regionSizeMb).append(" mb] -> ") .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) .append( "(with ").append(assignedServers).append(" total candidates)"); } From ba831d9c75620c025de5dda57b50466f462e8bdb Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 17:35:13 -0400 Subject: [PATCH 029/126] If the first two bytes of start/stop are the same, the region holds exactly one cell --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 94fe5649aea6..080205c01c92 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -234,7 +234,8 @@ private static Set range(byte[] start, byte[] stop, short numCells) { if (stopCellId < 0 || stopCellId > numCells) { stopCellId = numCells; } - return IntStream.range(toCell(start), stopCellId) + short startCellId = toCell(start); + return IntStream.range(startCellId, Math.max(stopCellId, startCellId + 1)) .mapToObj(val -> (short) val).collect(Collectors.toSet()); } From cdd6e77afa299fa285ab753d11a7f156336cb74b Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 31 Oct 2024 17:46:07 -0400 Subject: [PATCH 030/126] Correct how we calculate the cells --- .../balancer/HubSpotCellCostFunction.java | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 080205c01c92..be1340bf6c28 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -235,8 +235,29 @@ private static Set range(byte[] start, byte[] stop, short numCells) { stopCellId = numCells; } short startCellId = toCell(start); - return IntStream.range(startCellId, Math.max(stopCellId, startCellId + 1)) - .mapToObj(val -> (short) val).collect(Collectors.toSet()); + + // if everything after the cell prefix is 0, this stop key is actually exclusive + boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); + + final IntStream cellStream; + if (isStopExclusive) { + cellStream = IntStream.range(startCellId, stopCellId); + } else { + // this is inclusive, but we have to make sure we include at least the startCellId, + // even if stopCell = startCell + 1 + cellStream = IntStream.rangeClosed(startCellId, Math.max(stopCellId, startCellId + 1)); + } + + return cellStream.mapToObj(val -> (short) val).collect(Collectors.toSet()); + } + + private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { + for (int i = offset; i < stop.length; i++) { + if (stop[i] != 0) { + return false; + } + } + return true; } private static short toCell(byte[] key) { From 426aeca086e59e53ba4e3c287a9da70343dc11ad Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 15:07:22 -0500 Subject: [PATCH 031/126] add a version identifier here --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index be1340bf6c28..9a37625a523a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -74,7 +74,7 @@ public class HubSpotCellCostFunction extends CostFunction { private String snapshotState() { StringBuilder stateString = new StringBuilder(); - stateString.append("HubSpotCellCostFunction config for ") + stateString.append("HubSpotCellCostFunction[0] config for ") .append( Optional.ofNullable(regions[0]) .map(RegionInfo::getTable) From 4a0a7fa76c82d195f0e3286ca06302cbdda1e758 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 15:32:45 -0500 Subject: [PATCH 032/126] This isn't really an edge case so much as the main case --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 9a37625a523a..f58dfb2e2660 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -23,6 +23,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; +import com.google.common.collect.ImmutableSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; @@ -236,6 +237,10 @@ private static Set range(byte[] start, byte[] stop, short numCells) { } short startCellId = toCell(start); + if (startCellId == stopCellId) { + return ImmutableSet.of(startCellId); + } + // if everything after the cell prefix is 0, this stop key is actually exclusive boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); From 867f49211449a45c612e4770a8f4a6955b9e83aa Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 15:44:04 -0500 Subject: [PATCH 033/126] Update logging & calcs --- .../balancer/HubSpotCellCostFunction.java | 144 +++++++++--------- .../balancer/TestHubSpotCellCostFunction.java | 2 +- 2 files changed, 76 insertions(+), 70 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index f58dfb2e2660..300f9784ee51 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,13 +17,13 @@ */ package org.apache.hadoop.hbase.master.balancer; +import com.google.common.collect.ImmutableSet; import java.util.Arrays; import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; -import com.google.common.collect.ImmutableSet; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; @@ -32,6 +32,7 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; @@ -61,7 +62,8 @@ public class HubSpotCellCostFunction extends CostFunction { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); } - @Override void prepare(BalancerClusterState cluster) { + @Override + void prepare(BalancerClusterState cluster) { numServers = cluster.numServers; numCells = calcNumCells(cluster.regions, MAX_CELL_COUNT); regions = cluster.regions; @@ -76,92 +78,93 @@ private String snapshotState() { StringBuilder stateString = new StringBuilder(); stateString.append("HubSpotCellCostFunction[0] config for ") - .append( - Optional.ofNullable(regions[0]) - .map(RegionInfo::getTable) - .map(TableName::getNameWithNamespaceInclAsString) - .orElseGet(() -> "N/A") - ).append(":") - .append("\n\tnumServers=").append(numServers) - .append("\n\tnumCells=").append(numCells) - .append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) + .append(Optional.ofNullable(regions[0]).map(RegionInfo::getTable) + .map(TableName::getNameWithNamespaceInclAsString).orElseGet(() -> "N/A")) + .append(":").append("\n\tnumServers=").append(numServers).append("\n\tnumCells=") + .append(numCells).append("\n\tmultiplier=").append(String.format("%.3f", getMultiplier())) .append("\n\tregions=\n["); - int numAssigned = 0; - int numUnassigned = 0; + if (LOG.isDebugEnabled()) { + int numAssigned = 0; + int numUnassigned = 0; + + for (int i = 0; i < regions.length; i++) { + RegionInfo region = regions[i]; + int[] regionLocations = this.regionLocations[i]; + Optional highestLocalityServerMaybe = + Optional.ofNullable(regionLocations).filter(locations -> locations.length > 0) + .map(locations -> locations[0]).map(serverIndex -> this.servers[serverIndex]); + int assignedServers = Optional.ofNullable(regionLocations) + .map(locations -> locations.length).orElseGet(() -> 0); + + if (assignedServers > 0) { + numAssigned++; + } else { + numUnassigned++; + } - for (int i = 0; i < regions.length; i++) { - RegionInfo region = regions[i]; - int[] regionLocations = this.regionLocations[i]; - Optional highestLocalityServerMaybe = - Optional.ofNullable(regionLocations).filter(locations -> locations.length > 0) - .map(locations -> locations[0]).map(serverIndex -> this.servers[serverIndex]); - int assignedServers = Optional.ofNullable(regionLocations).map(locations -> locations.length) - .orElseGet(() -> 0); - - if (assignedServers > 0) { - numAssigned++; - } else { - numUnassigned++; + int regionSizeMb = super.cluster.getRegionSizeMB(i); + String cellsInRegion = + toCellSetString(toCells(region.getStartKey(), region.getEndKey(), numCells)); + + stateString.append("\n\t").append(region.getShortNameToLog()).append("[") + .append(Bytes.toHex(region.getStartKey())).append(", ") + .append(Bytes.toHex(region.getEndKey())).append(") ").append(cellsInRegion).append(" [") + .append(regionSizeMb).append(" mb] -> ") + .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) + .append("(with ").append(assignedServers).append(" total candidates)"); } - int regionSizeMb = super.cluster.getRegionSizeMB(i); - String cellsInRegion = toCellSetString(toCells(region.getStartKey(), region.getEndKey(), numCells)); - - stateString.append("\n\t") - .append(region.getShortNameToLog()) - .append("[") - .append(Bytes.toHex(region.getStartKey())) - .append(", ") - .append(Bytes.toHex(region.getEndKey())) - .append(") ") - .append(cellsInRegion) - .append(" [").append(regionSizeMb).append(" mb] -> ") - .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) - .append( "(with ").append(assignedServers).append(" total candidates)"); + stateString.append("\n]\n\n\tAssigned regions: ").append(numAssigned) + .append("\n\tUnassigned regions: ").append(numUnassigned).append("\n"); + } else { + stateString.append("\n\t").append(regions.length).append(" regions\n]"); } - stateString.append("\n]\n\n\tAssigned regions: ").append(numAssigned) - .append("\n\tUnassigned regions: ").append(numUnassigned).append("\n"); return stateString.toString(); } private static String toCellSetString(Set cells) { - return cells.stream() - .map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); + return cells.stream().map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); } - @Override protected double cost() { - if (regions != null && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default")) { - LOG.info("Evaluating {}", snapshotState()); + @Override + protected double cost() { + double cost = calculateCurrentCellCost(numCells, numServers, regions, regionLocations, + super.cluster::getRegionSizeMB); + + if ( + regions != null && regions.length > 0 + && regions[0].getTable().getNamespaceAsString().equals("default") + ) { + LOG.info("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); } - return calculateCurrentCellCost(numCells, numServers, regions, regionLocations, super.cluster::getRegionSizeMB); + return cost; } - static int calculateCurrentCellCost( - short numCells, - int numServers, - RegionInfo[] regions, - int[][] regionLocations, - Function getRegionSizeMbFunc - ) { + static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions, + int[][] regionLocations, Function getRegionSizeMbFunc) { int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); - Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); + Preconditions.checkState(bestCaseMaxCellsPerServer > 0, + "Best case max cells per server must be > 0"); if (LOG.isDebugEnabled()) { Set tableAndNamespace = Arrays.stream(regions).map(RegionInfo::getTable) .map(table -> table.getNameAsString() + "." + table.getNamespaceAsString()) .collect(Collectors.toSet()); - LOG.debug("Calculating current cell cost for {} regions from these tables {}", regions.length, tableAndNamespace); + LOG.debug("Calculating current cell cost for {} regions from these tables {}", regions.length, + tableAndNamespace); } if (regions.length > 0 && !regions[0].getTable().getNamespaceAsString().equals("default")) { - LOG.info("Skipping cost calculation for non-default namespace on {}", regions[0].getTable().getNameWithNamespaceInclAsString()); + LOG.info("Skipping cost calculation for non-default namespace on {}", + regions[0].getTable().getNameWithNamespaceInclAsString()); return 0; } int[] cellsPerServer = new int[numServers]; + int maxCellsPerServer = bestCaseMaxCellsPerServer; for (int i = 0; i < regions.length; i++) { RegionInfo region = regions[i]; Preconditions.checkNotNull(region, "No region available at index " + i); @@ -170,14 +173,17 @@ static int calculateCurrentCellCost( } int[] serverListForRegion = regionLocations[i]; - Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); + Preconditions.checkNotNull(serverListForRegion, + "No region location available for " + region.getShortNameToLog()); Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); - LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); + if (LOG.isDebugEnabled()) { + LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); + } if (serverListForRegion.length == 0) { int regionSizeMb = getRegionSizeMbFunc.apply(i); - if (regionSizeMb == 0) { + if (regionSizeMb == 0 && LOG.isTraceEnabled()) { LOG.trace("{} ({} mb) {}: no servers available, this IS an empty region", region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells)); } else { @@ -190,17 +196,16 @@ static int calculateCurrentCellCost( int serverIndex = serverListForRegion[0]; cellsPerServer[serverIndex] += regionCells.size(); + maxCellsPerServer = Math.max(maxCellsPerServer, cellsPerServer[serverIndex]); } for (int i = 0; i < numServers; i++) { - LOG.info("Server {} has {} cells", i, cellsPerServer[i]); + if (LOG.isDebugEnabled()) { + LOG.debug("Server {} has {} cells", i, cellsPerServer[i]); + } } - - int currentMaxCellsPerServer = - Arrays.stream(cellsPerServer).max().orElseGet(() -> bestCaseMaxCellsPerServer); - - return Math.max(0, currentMaxCellsPerServer - bestCaseMaxCellsPerServer); + return maxCellsPerServer; } static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { @@ -224,7 +229,7 @@ private static byte[] padToTwoBytes(byte[] key, byte pad) { } if (key.length == 1) { - return new byte[] { pad, key[0]}; + return new byte[] { pad, key[0] }; } return key; @@ -266,7 +271,8 @@ private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { } private static short toCell(byte[] key) { - Preconditions.checkArgument(key != null && key.length >= 2, "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); + Preconditions.checkArgument(key != null && key.length >= 2, + "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); return Bytes.toShort(key, 0, 2); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index e86453fd39b7..b38607b54490 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.util.function.Function; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -30,7 +31,6 @@ import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.util.function.Function; @Category({ MasterTests.class, SmallTests.class }) public class TestHubSpotCellCostFunction { From 6120bc8ac14b81b5df2d4185feecc09fb9b67e26 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 15:45:18 -0500 Subject: [PATCH 034/126] Use shaded version --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 300f9784ee51..64cf2564bef2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import com.google.common.collect.ImmutableSet; import java.util.Arrays; import java.util.Optional; import java.util.Set; @@ -29,6 +28,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; From b3a664b8a37edc5b88a487f34ef4240524ac6139 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 17:06:24 -0500 Subject: [PATCH 035/126] emit the table name and namespace --- .../master/balancer/StochasticLoadBalancer.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 70e50826759f..def892447340 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -551,17 +551,17 @@ protected List balanceTable(TableName tableName, computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps); if (calculatedMaxSteps > maxSteps) { LOG.warn( - "calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "[{}] calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "maxSteps:{}. Hence load balancing may not work well. Setting parameter " + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue." + "(This config change does not require service restart)", - calculatedMaxSteps, maxSteps); + tableName.getNameWithNamespaceInclAsString(), calculatedMaxSteps, maxSteps); } } LOG.info( - "Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "[{}] Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "functionCost={} computedMaxSteps={}", - currentCost / sumMultiplier, functionCost(), computedMaxSteps); + tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), computedMaxSteps); final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. @@ -606,19 +606,19 @@ protected List balanceTable(TableName tableName, updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); plans = createRegionPlans(cluster); LOG.info( - "Finished computing new moving plan. Computation took {} ms" + "[{}] Finished computing new moving plan. Computation took {} ms" + " to try {} different iterations. Found a solution that moves " + "{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. funtionCost={}", - endTime - startTime, step, plans.size(), initCost / sumMultiplier, + tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), initCost / sumMultiplier, currentCost / sumMultiplier, functionCost()); sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); return plans; } LOG.info( - "Could not find a better moving plan. Tried {} different configurations in " + "[{}] Could not find a better moving plan. Tried {} different configurations in " + "{} ms, and did not find anything with an imbalance score less than {}", - step, endTime - startTime, initCost / sumMultiplier); + tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, initCost / sumMultiplier); return null; } From a3b9b886de040b5ff72613afd11ec71ae618cea9 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 5 Nov 2024 18:08:34 -0500 Subject: [PATCH 036/126] Switch to multidimensional array to reduce allocations --- .../balancer/HubSpotCellCostFunction.java | 78 ++++++++++++++----- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 64cf2564bef2..abcb17c178a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -51,6 +51,8 @@ public class HubSpotCellCostFunction extends CostFunction { private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now private static final short MAX_CELL_COUNT = 360; + private static final byte PAD_START_KEY = 0; + private static final byte PAD_END_KEY = -1; private int numServers; private short numCells; @@ -71,7 +73,9 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - LOG.info("Initializing {}", snapshotState()); + if (LOG.isDebugEnabled()) { + LOG.debug("Initializing {}", snapshotState()); + } } private String snapshotState() { @@ -136,8 +140,9 @@ protected double cost() { if ( regions != null && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") + && LOG.isDebugEnabled() ) { - LOG.info("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); + LOG.debug("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); } return cost; @@ -158,13 +163,14 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] } if (regions.length > 0 && !regions[0].getTable().getNamespaceAsString().equals("default")) { - LOG.info("Skipping cost calculation for non-default namespace on {}", - regions[0].getTable().getNameWithNamespaceInclAsString()); + if (LOG.isDebugEnabled()) { + LOG.debug("Skipping cost calculation for non-default namespace on {}", + regions[0].getTable().getNameWithNamespaceInclAsString()); + } return 0; } - int[] cellsPerServer = new int[numServers]; - int maxCellsPerServer = bestCaseMaxCellsPerServer; + boolean[][] serverHasCell = new boolean[numServers][numCells]; for (int i = 0; i < regions.length; i++) { RegionInfo region = regions[i]; Preconditions.checkNotNull(region, "No region available at index " + i); @@ -176,38 +182,72 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] Preconditions.checkNotNull(serverListForRegion, "No region location available for " + region.getShortNameToLog()); - Set regionCells = toCells(region.getStartKey(), region.getEndKey(), numCells); - if (LOG.isDebugEnabled()) { - LOG.debug("Region {} has {} cells", region.getEncodedName(), regionCells); - } + if (serverListForRegion.length == 0) { int regionSizeMb = getRegionSizeMbFunc.apply(i); if (regionSizeMb == 0 && LOG.isTraceEnabled()) { - LOG.trace("{} ({} mb) {}: no servers available, this IS an empty region", - region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells)); + LOG.trace("{} ({} mb): no servers available, this IS an empty region", + region.getShortNameToLog(), regionSizeMb); } else { - LOG.warn("{} ({} mb) {}: no servers available, this IS NOT an empty region", - region.getShortNameToLog(), regionSizeMb, toCellSetString(regionCells)); + LOG.warn("{} ({} mb): no servers available, this IS NOT an empty region", + region.getShortNameToLog(), regionSizeMb); } continue; } int serverIndex = serverListForRegion[0]; - cellsPerServer[serverIndex] += regionCells.size(); - maxCellsPerServer = Math.max(maxCellsPerServer, cellsPerServer[serverIndex]); + + setCellsForServer(serverHasCell[serverIndex], region.getStartKey(), region.getEndKey(), numCells); } + int maxCellsPerServer = 0; for (int i = 0; i < numServers; i++) { - if (LOG.isDebugEnabled()) { - LOG.debug("Server {} has {} cells", i, cellsPerServer[i]); + int cellsOnThisServer = 0; + for (int j = 0; j < numCells; j++) { + if (serverHasCell[i][j]) { + cellsOnThisServer++; + } } + + maxCellsPerServer = Math.max(maxCellsPerServer, cellsOnThisServer); } return maxCellsPerServer; } + private static void setCellsForServer( + boolean[] serverHasCell, + byte[] startKey, + byte[] endKey, + short numCells + ) { + byte[] start = padToTwoBytes(startKey, PAD_START_KEY); + byte[] stop = padToTwoBytes(endKey, PAD_END_KEY); + + short stopCellId = toCell(stop); + if (stopCellId < 0 || stopCellId > numCells) { + stopCellId = numCells; + } + short startCellId = toCell(start); + + if (startCellId == stopCellId) { + serverHasCell[startCellId] = true; + return; + } + + // if everything after the cell prefix is 0, this stop key is actually exclusive + boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); + for (short i = startCellId; i < stopCellId; i++) { + serverHasCell[i] = true; + } + + if (!isStopExclusive) { + serverHasCell[stopCellId] = true; + } + } + static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { if (regionInfos == null || regionInfos.length == 0) { return 0; @@ -263,7 +303,7 @@ private static Set range(byte[] start, byte[] stop, short numCells) { private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { for (int i = offset; i < stop.length; i++) { - if (stop[i] != 0) { + if (stop[i] != (byte) 0) { return false; } } From 45ed6067c60c4342646ab3d4d6c14c9afec8bc82 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 11:35:03 -0500 Subject: [PATCH 037/126] Optimize the balancer eval function --- .../balancer/HubSpotCellCostFunction.java | 80 +++++++++++-------- .../balancer/TestHubSpotCellCostFunction.java | 1 + 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index abcb17c178a2..873ad59210ca 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hbase.master.balancer; +import com.google.common.util.concurrent.AtomicDouble; import java.util.Arrays; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Function; @@ -28,12 +30,13 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; +import org.apache.hbase.thirdparty.com.google.common.math.Quantiles; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; /** @@ -139,8 +142,7 @@ protected double cost() { if ( regions != null && regions.length > 0 - && regions[0].getTable().getNamespaceAsString().equals("default") - && LOG.isDebugEnabled() + && regions[0].getTable().getNamespaceAsString().equals("default") && LOG.isDebugEnabled() ) { LOG.debug("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); } @@ -172,37 +174,33 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] boolean[][] serverHasCell = new boolean[numServers][numCells]; for (int i = 0; i < regions.length; i++) { - RegionInfo region = regions[i]; - Preconditions.checkNotNull(region, "No region available at index " + i); - if (!region.getTable().getNamespaceAsString().equals("default")) { - continue; + if (regions[i] == null) { + throw new IllegalStateException("No region available at index " + i); } - int[] serverListForRegion = regionLocations[i]; - Preconditions.checkNotNull(serverListForRegion, - "No region location available for " + region.getShortNameToLog()); - - + if (regionLocations[i] == null) { + throw new IllegalStateException( + "No server list available for region " + regions[i].getShortNameToLog()); + } - if (serverListForRegion.length == 0) { + if (regionLocations[i].length == 0) { int regionSizeMb = getRegionSizeMbFunc.apply(i); if (regionSizeMb == 0 && LOG.isTraceEnabled()) { LOG.trace("{} ({} mb): no servers available, this IS an empty region", - region.getShortNameToLog(), regionSizeMb); + regions[i].getShortNameToLog(), regionSizeMb); } else { LOG.warn("{} ({} mb): no servers available, this IS NOT an empty region", - region.getShortNameToLog(), regionSizeMb); + regions[i].getShortNameToLog(), regionSizeMb); } continue; } - int serverIndex = serverListForRegion[0]; - - setCellsForServer(serverHasCell[serverIndex], region.getStartKey(), region.getEndKey(), numCells); + setCellsForServer(serverHasCell[regionLocations[i][0]], regions[i].getStartKey(), + regions[i].getEndKey(), numCells); } - int maxCellsPerServer = 0; + int[] cellsPerServer = new int[numServers]; for (int i = 0; i < numServers; i++) { int cellsOnThisServer = 0; for (int j = 0; j < numCells; j++) { @@ -211,38 +209,48 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] } } - maxCellsPerServer = Math.max(maxCellsPerServer, cellsOnThisServer); + cellsPerServer[i] = cellsOnThisServer; } - return maxCellsPerServer; + Map stats = Quantiles.scale(100) + .indexes(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(cellsPerServer); + + AtomicDouble totalCost = new AtomicDouble(0.0); + stats.forEach((percentile, value) -> totalCost.addAndGet(value)); + + return (int) Math + .round(Math.max(0, totalCost.get() / stats.size() - bestCaseMaxCellsPerServer)); } - private static void setCellsForServer( - boolean[] serverHasCell, - byte[] startKey, - byte[] endKey, - short numCells - ) { - byte[] start = padToTwoBytes(startKey, PAD_START_KEY); - byte[] stop = padToTwoBytes(endKey, PAD_END_KEY); + private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, byte[] endKey, + short numCells) { + short startCellId = (startKey == null || startKey.length == 0) + ? 0 + : (startKey.length >= 2 + ? Bytes.toShort(startKey, 0, 2) + : Bytes.toShort(new byte[] { 0, startKey[0] })); + short stopCellId = (endKey == null || endKey.length == 0) + ? (short) (numCells - 1) + : (endKey.length >= 2 + ? Bytes.toShort(endKey, 0, 2) + : Bytes.toShort(new byte[] { -1, endKey[0] })); - short stopCellId = toCell(stop); if (stopCellId < 0 || stopCellId > numCells) { stopCellId = numCells; } - short startCellId = toCell(start); if (startCellId == stopCellId) { serverHasCell[startCellId] = true; return; } - // if everything after the cell prefix is 0, this stop key is actually exclusive - boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); for (short i = startCellId; i < stopCellId; i++) { serverHasCell[i] = true; } + // if everything after the cell prefix is 0, this stop key is actually exclusive + boolean isStopExclusive = + endKey != null && endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2); if (!isStopExclusive) { serverHasCell[stopCellId] = true; } @@ -311,8 +319,10 @@ private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { } private static short toCell(byte[] key) { - Preconditions.checkArgument(key != null && key.length >= 2, - "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); + if (key == null || key.length < 2) { + throw new IllegalArgumentException( + "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); + } return Bytes.toShort(key, 0, 2); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index b38607b54490..1d3d95a954d2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -129,4 +129,5 @@ private RegionInfo buildRegionInfo(Short startCell, Short stopCell) { .setEndKey(stopCell == null ? null : Bytes.toBytes(stopCell)).build(); return result; } + } From 941d2d2f78211332d270228688a360aa6863f06b Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 11:39:32 -0500 Subject: [PATCH 038/126] Deps --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 873ad59210ca..5c93d555f783 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import com.google.common.util.concurrent.AtomicDouble; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -30,6 +29,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.util.concurrent.AtomicDouble; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; From 0123942ee5d0d829440143283cf154390b1e022a Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 14:30:49 -0500 Subject: [PATCH 039/126] Add custom step generator for stochastic load to prioritize shuffling regions from high cell count servers to low cell count servers --- .../HubSpotCellBasedCandidateGenerator.java | 128 ++++++++++++++++++ .../balancer/HubSpotCellCostFunction.java | 15 +- .../balancer/StochasticLoadBalancer.java | 4 +- 3 files changed, 142 insertions(+), 5 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java new file mode 100644 index 000000000000..b93e593bc2f7 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; + +@InterfaceAudience.Private +class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { + + private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); + + @Override + BalanceAction generate(BalancerClusterState cluster) { + int[][] regionsPerServer = cluster.regionsPerServer; + + int serverWithMostCells = -1; + int mostCellsPerServerSoFar = 0; + double mostCellsReservoirRandom = -1; + + int serverWithFewestCells = -1; + int fewestCellsPerServerSoFar = 360; + double fewestCellsReservoirRandom = -1; + + for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { + int cellsOnServer = numCells(cluster, regionsPerServer[serverIndex]); + + if (cellsOnServer > mostCellsPerServerSoFar) { + mostCellsPerServerSoFar = cellsOnServer; + mostCellsReservoirRandom = -1; + } else if ( cellsOnServer == mostCellsPerServerSoFar) { + // we don't know how many servers have the same cell count, so use a simplified online + // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) + double maxCellRandom = ThreadLocalRandom.current().nextDouble(); + if (maxCellRandom > mostCellsReservoirRandom) { + serverWithMostCells = serverIndex; + mostCellsReservoirRandom = maxCellRandom; + } + } + + if (cellsOnServer < fewestCellsPerServerSoFar) { + fewestCellsPerServerSoFar = cellsOnServer; + fewestCellsReservoirRandom = -1; + } else if ( cellsOnServer == fewestCellsPerServerSoFar) { + // we don't know how many servers have the same cell count, so use a simplified online + // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) + double minCellRandom = ThreadLocalRandom.current().nextDouble(); + if (minCellRandom > fewestCellsReservoirRandom) { + serverWithFewestCells = serverIndex; + fewestCellsReservoirRandom = minCellRandom; + } + } + } + + BalanceAction action = + maybeMoveRegionFromHeaviestToLightest(cluster, serverWithMostCells, serverWithFewestCells); + + if (LOG.isDebugEnabled()) { + LOG.debug("Attempting {} ({} cells --> {} cells)", + action.toString(), + mostCellsPerServerSoFar, + fewestCellsPerServerSoFar + ); + } + + return action; + } + + private int numCells(BalancerClusterState cluster, int[] regions) { + Set cells = new HashSet<>(regions.length); + + for (int regionIndex : regions) { + RegionInfo region = cluster.regions[regionIndex]; + byte[] startKey = region.getStartKey(); + byte[] endKey = region.getEndKey(); + + short startCellId = (startKey == null || startKey.length == 0) + ? 0 + : (startKey.length >= 2 + ? Bytes.toShort(startKey, 0, 2) + : Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) + ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) + : (endKey.length >= 2 + ? Bytes.toShort(endKey, 0, 2) + : Bytes.toShort(new byte[] { -1, endKey[0] })); + + for (short i = startCellId; i < endCellId; i++) { + cells.add(i); + } + + if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { + cells.add(endCellId); + } + } + + return cells.size(); + } + + BalanceAction maybeMoveRegionFromHeaviestToLightest(BalancerClusterState cluster, int fromServer, int toServer) { + if (fromServer < 0 || toServer < 0) { + return BalanceAction.NULL_ACTION; + } + + return getAction(fromServer, pickRandomRegion(cluster, fromServer, 0.5), toServer, -1); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 5c93d555f783..b7990cefd19b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -53,7 +53,7 @@ public class HubSpotCellCostFunction extends CostFunction { "hbase.master.balancer.stochastic.hubspotCellCost"; private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now - private static final short MAX_CELL_COUNT = 360; + static final short MAX_CELL_COUNT = 360; private static final byte PAD_START_KEY = 0; private static final byte PAD_END_KEY = -1; @@ -249,13 +249,15 @@ private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, } // if everything after the cell prefix is 0, this stop key is actually exclusive - boolean isStopExclusive = - endKey != null && endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2); - if (!isStopExclusive) { + if (!isStopExclusive(endKey)) { serverHasCell[stopCellId] = true; } } + static boolean isStopExclusive(byte[] endKey) { + return endKey != null && endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2); + } + static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { if (regionInfos == null || regionInfos.length == 0) { return 0; @@ -326,4 +328,9 @@ private static short toCell(byte[] key) { return Bytes.toShort(key, 0, 2); } + + @Override + public final void updateWeight(double[] weights) { + weights[StochasticLoadBalancer.GeneratorType.HUBSPOT_CELL.ordinal()] += cost(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index def892447340..adf746f1d4a5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -147,7 +147,8 @@ public enum GeneratorType { RANDOM, LOAD, LOCALITY, - RACK + RACK, + HUBSPOT_CELL } private double[] weightsOfGenerators; @@ -229,6 +230,7 @@ List getCandidateGenerators() { protected List createCandidateGenerators() { List candidateGenerators = new ArrayList(4); candidateGenerators.add(GeneratorType.RANDOM.ordinal(), new RandomCandidateGenerator()); + candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); candidateGenerators.add(GeneratorType.LOAD.ordinal(), new LoadCandidateGenerator()); candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), From 43fb3da588d1b6b583413838c93a993e6a1d8a23 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 15:32:52 -0500 Subject: [PATCH 040/126] Use our own candidate generator for the stochastic balancer --- .../HubSpotCellBasedCandidateGenerator.java | 160 +++++++++++++----- 1 file changed, 114 insertions(+), 46 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index b93e593bc2f7..f1ff49caedbf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -17,39 +17,42 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.ThreadLocalRandom; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; +import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; -@InterfaceAudience.Private -class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { +@InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { - private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); + private static final Logger LOG = + LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); + private static final double CHANCE_OF_NOOP = 0.2; - @Override - BalanceAction generate(BalancerClusterState cluster) { + @Override BalanceAction generate(BalancerClusterState cluster) { + cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; int serverWithMostCells = -1; int mostCellsPerServerSoFar = 0; double mostCellsReservoirRandom = -1; - int serverWithFewestCells = -1; - int fewestCellsPerServerSoFar = 360; - double fewestCellsReservoirRandom = -1; - for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { int cellsOnServer = numCells(cluster, regionsPerServer[serverIndex]); if (cellsOnServer > mostCellsPerServerSoFar) { mostCellsPerServerSoFar = cellsOnServer; mostCellsReservoirRandom = -1; - } else if ( cellsOnServer == mostCellsPerServerSoFar) { + } else if (cellsOnServer == mostCellsPerServerSoFar) { // we don't know how many servers have the same cell count, so use a simplified online // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) double maxCellRandom = ThreadLocalRandom.current().nextDouble(); @@ -58,30 +61,12 @@ BalanceAction generate(BalancerClusterState cluster) { mostCellsReservoirRandom = maxCellRandom; } } - - if (cellsOnServer < fewestCellsPerServerSoFar) { - fewestCellsPerServerSoFar = cellsOnServer; - fewestCellsReservoirRandom = -1; - } else if ( cellsOnServer == fewestCellsPerServerSoFar) { - // we don't know how many servers have the same cell count, so use a simplified online - // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) - double minCellRandom = ThreadLocalRandom.current().nextDouble(); - if (minCellRandom > fewestCellsReservoirRandom) { - serverWithFewestCells = serverIndex; - fewestCellsReservoirRandom = minCellRandom; - } - } } - BalanceAction action = - maybeMoveRegionFromHeaviestToLightest(cluster, serverWithMostCells, serverWithFewestCells); + BalanceAction action = maybeMoveRegion(cluster, serverWithMostCells); if (LOG.isDebugEnabled()) { - LOG.debug("Attempting {} ({} cells --> {} cells)", - action.toString(), - mostCellsPerServerSoFar, - fewestCellsPerServerSoFar - ); + LOG.debug("Attempting {} ({} cells max)", action.toString(), mostCellsPerServerSoFar); } return action; @@ -95,16 +80,16 @@ private int numCells(BalancerClusterState cluster, int[] regions) { byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - short startCellId = (startKey == null || startKey.length == 0) - ? 0 - : (startKey.length >= 2 - ? Bytes.toShort(startKey, 0, 2) - : Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) - ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) - : (endKey.length >= 2 - ? Bytes.toShort(endKey, 0, 2) - : Bytes.toShort(new byte[] { -1, endKey[0] })); + short startCellId = (startKey == null || startKey.length == 0) ? + 0 : + (startKey.length >= 2 ? + Bytes.toShort(startKey, 0, 2) : + Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) ? + (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (endKey.length >= 2 ? + Bytes.toShort(endKey, 0, 2) : + Bytes.toShort(new byte[] { -1, endKey[0] })); for (short i = startCellId; i < endCellId; i++) { cells.add(i); @@ -118,11 +103,94 @@ private int numCells(BalancerClusterState cluster, int[] regions) { return cells.size(); } - BalanceAction maybeMoveRegionFromHeaviestToLightest(BalancerClusterState cluster, int fromServer, int toServer) { - if (fromServer < 0 || toServer < 0) { + BalanceAction maybeMoveRegion(BalancerClusterState cluster, int fromServer) { + if (fromServer < 0 || cluster.regionsPerServer[fromServer].length == 0 + || ThreadLocalRandom.current().nextFloat() < CHANCE_OF_NOOP) { return BalanceAction.NULL_ACTION; } - return getAction(fromServer, pickRandomRegion(cluster, fromServer, 0.5), toServer, -1); + Multimap cellsByRegionOnSource = + computeCellsByRegion(cluster.regionsPerServer[fromServer], cluster.regions); + Map countOfRegionsForCellOnSource = new HashMap<>(); + cellsByRegionOnSource.forEach( + (region, cell) -> countOfRegionsForCellOnSource.computeIfAbsent(cell, + ignored -> new AtomicInteger()).incrementAndGet()); + + int regionWithFewestInstancesOfCellsPresent = + cellsByRegionOnSource.keySet().stream().min(Comparator.comparing(region -> { + return cellsByRegionOnSource.get(region).stream().mapToInt(cell -> { + return countOfRegionsForCellOnSource.get(cell).get(); + }).max().orElseGet(() -> 0); + })).orElseGet(() -> -1); + + int targetServer = computeBestServerToReceiveRegion(cluster, fromServer, + regionWithFewestInstancesOfCellsPresent); + + return getAction(fromServer, regionWithFewestInstancesOfCellsPresent, targetServer, -1); + } + + private int computeBestServerToReceiveRegion(BalancerClusterState cluster, int currentServer, + int region) { + // This is the lightest loaded (by count), but we want to keep cell collocation to a minimum + int target = cluster.serverIndicesSortedByRegionCount[0]; + + Set cellsOnTransferRegion = + new HashSet<>(computeCellsByRegion(new int[] { region }, cluster.regions).get(region)); + + // so, we'll make a best effort to see if we can find a reasonably loaded server that already + // has the cells for this region + for (int i = 0; i < cluster.serverIndicesSortedByRegionCount.length; i++) { + int server = cluster.serverIndicesSortedByRegionCount[i]; + + if (server == currentServer) { + continue; + } + + int[] regionsOnCandidate = cluster.regionsPerServer[server]; + if (regionsOnCandidate.length > 2 * cluster.regionsPerServer[currentServer].length) { + // don't try to transfer a region to a server that already has more than 2x ours + break; + } + + Multimap possibleTargetCellsByRegion = + computeCellsByRegion(regionsOnCandidate, cluster.regions); + // if the candidate server has all the cells we need, this transfer can only improve isolation + if (new HashSet<>(possibleTargetCellsByRegion.values()).containsAll(cellsOnTransferRegion)) { + target = server; + break; + } + } + + return target; + } + + private Multimap computeCellsByRegion(int[] regionIndices, RegionInfo[] regions) { + ImmutableMultimap.Builder resultBuilder = ImmutableMultimap.builder(); + for (int regionIndex : regionIndices) { + RegionInfo region = regions[regionIndex]; + + byte[] startKey = region.getStartKey(); + byte[] endKey = region.getEndKey(); + + short startCellId = (startKey == null || startKey.length == 0) ? + 0 : + (startKey.length >= 2 ? + Bytes.toShort(startKey, 0, 2) : + Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) ? + (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (endKey.length >= 2 ? + Bytes.toShort(endKey, 0, 2) : + Bytes.toShort(new byte[] { -1, endKey[0] })); + + for (short i = startCellId; i < endCellId; i++) { + resultBuilder.put(regionIndex, i); + } + + if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { + resultBuilder.put(regionIndex, endCellId); + } + } + return resultBuilder.build(); } } From 3d45b4df3e51b8c725a5f78c5ca6a168301e775c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 16:11:58 -0500 Subject: [PATCH 041/126] Yep that sure is a 5 --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index adf746f1d4a5..87967b58daff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -228,7 +228,7 @@ List getCandidateGenerators() { } protected List createCandidateGenerators() { - List candidateGenerators = new ArrayList(4); + List candidateGenerators = new ArrayList(5); candidateGenerators.add(GeneratorType.RANDOM.ordinal(), new RandomCandidateGenerator()); candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); candidateGenerators.add(GeneratorType.LOAD.ordinal(), new LoadCandidateGenerator()); From cf1064d11d6ce574b9b6515cf10fc4e75ebaa8fb Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 6 Nov 2024 16:17:26 -0500 Subject: [PATCH 042/126] It has to be the order of the ordinal of course --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 87967b58daff..ece9e2833bd5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -230,11 +230,11 @@ List getCandidateGenerators() { protected List createCandidateGenerators() { List candidateGenerators = new ArrayList(5); candidateGenerators.add(GeneratorType.RANDOM.ordinal(), new RandomCandidateGenerator()); - candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); candidateGenerators.add(GeneratorType.LOAD.ordinal(), new LoadCandidateGenerator()); candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), new RegionReplicaRackCandidateGenerator()); + candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); return candidateGenerators; } From 04fecfe79c911b01cb9dd4088a009bb9727c2609 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 10:19:56 -0500 Subject: [PATCH 043/126] Correct reservoir sampling seed, and use boolean[] instead of set --- .../HubSpotCellBasedCandidateGenerator.java | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index f1ff49caedbf..663df9dd8280 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -39,6 +39,10 @@ private static final double CHANCE_OF_NOOP = 0.2; @Override BalanceAction generate(BalancerClusterState cluster) { + if (LOG.isTraceEnabled()) { + LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions", + cluster.regionsPerServer.length, cluster.regions.length); + } cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; @@ -49,12 +53,12 @@ for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { int cellsOnServer = numCells(cluster, regionsPerServer[serverIndex]); + // we don't know how many servers have the same cell count, so use a simplified online + // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) if (cellsOnServer > mostCellsPerServerSoFar) { mostCellsPerServerSoFar = cellsOnServer; - mostCellsReservoirRandom = -1; + mostCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); } else if (cellsOnServer == mostCellsPerServerSoFar) { - // we don't know how many servers have the same cell count, so use a simplified online - // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) double maxCellRandom = ThreadLocalRandom.current().nextDouble(); if (maxCellRandom > mostCellsReservoirRandom) { serverWithMostCells = serverIndex; @@ -73,7 +77,7 @@ } private int numCells(BalancerClusterState cluster, int[] regions) { - Set cells = new HashSet<>(regions.length); + boolean[] cellsPresent = new boolean[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int regionIndex : regions) { RegionInfo region = cluster.regions[regionIndex]; @@ -92,15 +96,22 @@ private int numCells(BalancerClusterState cluster, int[] regions) { Bytes.toShort(new byte[] { -1, endKey[0] })); for (short i = startCellId; i < endCellId; i++) { - cells.add(i); + cellsPresent[i] = true; } if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { - cells.add(endCellId); + cellsPresent[endCellId] = true; + } + } + + int count = 0; + for (boolean hasCell : cellsPresent) { + if (hasCell) { + count++; } } - return cells.size(); + return count; } BalanceAction maybeMoveRegion(BalancerClusterState cluster, int fromServer) { From 35adc466a55996b7e99efd10064afb344e4c9031 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 11:16:06 -0500 Subject: [PATCH 044/126] Cost is invoked 2-3 times per use, memoize it --- .../balancer/HubSpotCellCostFunction.java | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index b7990cefd19b..e8041e7b2047 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -21,7 +21,9 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; +import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; @@ -29,6 +31,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; import org.apache.hbase.thirdparty.com.google.common.util.concurrent.AtomicDouble; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -57,14 +60,19 @@ public class HubSpotCellCostFunction extends CostFunction { private static final byte PAD_START_KEY = 0; private static final byte PAD_END_KEY = -1; + private final AtomicBoolean isCostUpToDate; + private int numServers; private short numCells; private ServerName[] servers; private RegionInfo[] regions; // not necessarily sorted private int[][] regionLocations; + private Supplier memoizedCostSupplier; + HubSpotCellCostFunction(Configuration conf) { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); + this.isCostUpToDate = new AtomicBoolean(false); } @Override @@ -76,6 +84,9 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); + this.isCostUpToDate.set(false); + this.memoizedCostSupplier = Suppliers.memoize(() -> 0.0); + if (LOG.isDebugEnabled()) { LOG.debug("Initializing {}", snapshotState()); } @@ -137,8 +148,14 @@ private static String toCellSetString(Set cells) { @Override protected double cost() { - double cost = calculateCurrentCellCost(numCells, numServers, regions, regionLocations, - super.cluster::getRegionSizeMB); + if (isCostUpToDate.get()) { + return memoizedCostSupplier.get(); + } + + double cost = calculateCurrentCellCost(numCells, numServers, regions, regionLocations, super.cluster::getRegionSizeMB); + + this.memoizedCostSupplier = Suppliers.memoize(() -> cost); + this.isCostUpToDate.set(true); if ( regions != null && regions.length > 0 From 3a744abbbb0848dbbc9582216c671b0d1c5ba382 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 11:18:18 -0500 Subject: [PATCH 045/126] Filter out non-default regions --- .../balancer/HubSpotCellBasedCandidateGenerator.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 663df9dd8280..e18dedac1e0a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -81,6 +81,11 @@ private int numCells(BalancerClusterState cluster, int[] regions) { for (int regionIndex : regions) { RegionInfo region = cluster.regions[regionIndex]; + + if (!region.getTable().getNamespaceAsString().equals("default")) { + continue; + } + byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); @@ -180,6 +185,10 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio for (int regionIndex : regionIndices) { RegionInfo region = regions[regionIndex]; + if (!region.getTable().getNamespaceAsString().equals("default")) { + continue; + } + byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); From fe67705d41ca5aaf546de413adda9eea3a3e4fc6 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 11:21:40 -0500 Subject: [PATCH 046/126] Prevent being out of bounds --- .../HubSpotCellBasedCandidateGenerator.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index e18dedac1e0a..25409052fef2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -100,6 +100,14 @@ private int numCells(BalancerClusterState cluster, int[] regions) { Bytes.toShort(endKey, 0, 2) : Bytes.toShort(new byte[] { -1, endKey[0] })); + if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { + startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + } + + if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { + endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + } + for (short i = startCellId; i < endCellId; i++) { cellsPresent[i] = true; } @@ -203,6 +211,14 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio Bytes.toShort(endKey, 0, 2) : Bytes.toShort(new byte[] { -1, endKey[0] })); + if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { + startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + } + + if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { + endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + } + for (short i = startCellId; i < endCellId; i++) { resultBuilder.put(regionIndex, i); } From 1d9b7ef1d17c7a6cb2c927de86f190f64d8c0701 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 12:03:44 -0500 Subject: [PATCH 047/126] Correct off-by-1 --- .../balancer/HubSpotCellBasedCandidateGenerator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 25409052fef2..22c14300ba19 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -101,11 +101,11 @@ private int numCells(BalancerClusterState cluster, int[] regions) { Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } for (short i = startCellId; i < endCellId; i++) { @@ -212,11 +212,11 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT; + endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } for (short i = startCellId; i < endCellId; i++) { From 0ac31f5256d8e68a3f3c2adce8d75d3085a780cd Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 12:11:41 -0500 Subject: [PATCH 048/126] Add guards here --- .../balancer/HubSpotCellBasedCandidateGenerator.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 22c14300ba19..f7e76c1cc004 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -80,6 +80,10 @@ private int numCells(BalancerClusterState cluster, int[] regions) { boolean[] cellsPresent = new boolean[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int regionIndex : regions) { + if (regionIndex < 0 || regionIndex > regions.length) { + continue; + } + RegionInfo region = cluster.regions[regionIndex]; if (!region.getTable().getNamespaceAsString().equals("default")) { @@ -191,6 +195,10 @@ private int computeBestServerToReceiveRegion(BalancerClusterState cluster, int c private Multimap computeCellsByRegion(int[] regionIndices, RegionInfo[] regions) { ImmutableMultimap.Builder resultBuilder = ImmutableMultimap.builder(); for (int regionIndex : regionIndices) { + if (regionIndex < 0 || regionIndex > regions.length) { + continue; + } + RegionInfo region = regions[regionIndex]; if (!region.getTable().getNamespaceAsString().equals("default")) { From 06c2ad5a5cbe9cd63b1f334a8268bfc6ffd9fbb9 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 12:12:58 -0500 Subject: [PATCH 049/126] Include the tables --- .../master/balancer/HubSpotCellBasedCandidateGenerator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index f7e76c1cc004..6ba7fd7cc07d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -40,8 +40,8 @@ @Override BalanceAction generate(BalancerClusterState cluster) { if (LOG.isTraceEnabled()) { - LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions", - cluster.regionsPerServer.length, cluster.regions.length); + LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions for tables {}", + cluster.regionsPerServer.length, cluster.regions.length, cluster.tables); } cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; From d25a9ac639cec8d80939cdf954902acc61113207 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 13:00:43 -0500 Subject: [PATCH 050/126] Do not emit null actions --- .../master/balancer/HubSpotCellBasedCandidateGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 6ba7fd7cc07d..fe884b3f6056 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -69,7 +69,7 @@ BalanceAction action = maybeMoveRegion(cluster, serverWithMostCells); - if (LOG.isDebugEnabled()) { + if (LOG.isDebugEnabled() && action.getType() != BalanceAction.Type.NULL) { LOG.debug("Attempting {} ({} cells max)", action.toString(), mostCellsPerServerSoFar); } From 235fe3420d0dadec303f8edfb597be25aeb75d9c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 14:07:28 -0500 Subject: [PATCH 051/126] only use these balancer tools on objects-3 --- .../master/balancer/HubSpotCellBasedCandidateGenerator.java | 5 +++++ .../hbase/master/balancer/HubSpotCellCostFunction.java | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index fe884b3f6056..6799c625912b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -43,6 +43,11 @@ LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions for tables {}", cluster.regionsPerServer.length, cluster.regions.length, cluster.tables); } + + if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { + return BalanceAction.NULL_ACTION; + } + cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index e8041e7b2047..b4962132bede 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -92,6 +92,10 @@ void prepare(BalancerClusterState cluster) { } } + @Override boolean isNeeded() { + return cluster.tables.stream().anyMatch(name -> name.contains("objects-3")); + } + private String snapshotState() { StringBuilder stateString = new StringBuilder(); From 351759889cdd4b9fce90c9fdae247b695c3c84fe Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 14:14:38 -0500 Subject: [PATCH 052/126] This is a bug - only add the cost of the function if it's needed --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index ece9e2833bd5..a2db3a8e1f37 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -792,7 +792,9 @@ void initCosts(BalancerClusterState cluster) { weightsOfGenerators = new double[this.candidateGenerators.size()]; for (CostFunction c : costFunctions) { c.prepare(cluster); - c.updateWeight(weightsOfGenerators); + if (c.isNeeded()) { + c.updateWeight(weightsOfGenerators); + } } } From c4c62968b277f0ec982272d87de2622d2ff719e2 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 15:19:39 -0500 Subject: [PATCH 053/126] Add a lot of trace logging --- .../HubSpotCellBasedCandidateGenerator.java | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 6799c625912b..9c065c97e500 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -39,15 +39,15 @@ private static final double CHANCE_OF_NOOP = 0.2; @Override BalanceAction generate(BalancerClusterState cluster) { + if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { + return BalanceAction.NULL_ACTION; + } + if (LOG.isTraceEnabled()) { LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions for tables {}", cluster.regionsPerServer.length, cluster.regions.length, cluster.tables); } - if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { - return BalanceAction.NULL_ACTION; - } - cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; @@ -136,14 +136,30 @@ private int numCells(BalancerClusterState cluster, int[] regions) { return count; } - BalanceAction maybeMoveRegion(BalancerClusterState cluster, int fromServer) { - if (fromServer < 0 || cluster.regionsPerServer[fromServer].length == 0 - || ThreadLocalRandom.current().nextFloat() < CHANCE_OF_NOOP) { + BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCells) { + if (serverWithMostCells < 0) { + if (LOG.isTraceEnabled()) { + LOG.trace("No server with cells found"); + } + return BalanceAction.NULL_ACTION; + } + + if (cluster.regionsPerServer[serverWithMostCells].length == 0) { + if (LOG.isTraceEnabled()) { + LOG.trace("{} has no regions", serverWithMostCells); + } + return BalanceAction.NULL_ACTION; + } + + if (ThreadLocalRandom.current().nextFloat() < CHANCE_OF_NOOP) { + if (LOG.isTraceEnabled()) { + LOG.trace("Randomly taking no action. Chaos! Mwahahaha!"); + } return BalanceAction.NULL_ACTION; } Multimap cellsByRegionOnSource = - computeCellsByRegion(cluster.regionsPerServer[fromServer], cluster.regions); + computeCellsByRegion(cluster.regionsPerServer[serverWithMostCells], cluster.regions); Map countOfRegionsForCellOnSource = new HashMap<>(); cellsByRegionOnSource.forEach( (region, cell) -> countOfRegionsForCellOnSource.computeIfAbsent(cell, @@ -156,10 +172,14 @@ BalanceAction maybeMoveRegion(BalancerClusterState cluster, int fromServer) { }).max().orElseGet(() -> 0); })).orElseGet(() -> -1); - int targetServer = computeBestServerToReceiveRegion(cluster, fromServer, + int targetServer = computeBestServerToReceiveRegion(cluster, serverWithMostCells, regionWithFewestInstancesOfCellsPresent); - return getAction(fromServer, regionWithFewestInstancesOfCellsPresent, targetServer, -1); + if (LOG.isTraceEnabled()) { + LOG.trace("Moving s{}.r{} to {}", serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer); + } + + return getAction(serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer, -1); } private int computeBestServerToReceiveRegion(BalancerClusterState cluster, int currentServer, From 1b1bc4430f1ecc538860c81de437c18e5b97ec7e Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 16:10:33 -0500 Subject: [PATCH 054/126] More logging --- .../HubSpotCellBasedCandidateGenerator.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 9c065c97e500..1a650888714c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -24,6 +25,7 @@ import java.util.Set; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; @@ -56,7 +58,15 @@ double mostCellsReservoirRandom = -1; for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { - int cellsOnServer = numCells(cluster, regionsPerServer[serverIndex]); + int[] regionsForServer = regionsPerServer[serverIndex]; + int cellsOnServer = numCells(cluster, regionsForServer); + + if (LOG.isTraceEnabled()) { + LOG.trace("Server {} has {} regions, which have {} cells", + serverIndex, + Arrays.stream(regionsForServer).boxed().sorted().collect( + Collectors.toList()), cellsOnServer); + } // we don't know how many servers have the same cell count, so use a simplified online // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) @@ -92,6 +102,9 @@ private int numCells(BalancerClusterState cluster, int[] regions) { RegionInfo region = cluster.regions[regionIndex]; if (!region.getTable().getNamespaceAsString().equals("default")) { + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping region {} because it's not in the default namespace", region.getTable().getNameWithNamespaceInclAsString()); + } continue; } From 6ac45baa7c5f97d9c50251f51a73f8001bb2f32d Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 16:57:39 -0500 Subject: [PATCH 055/126] Trace enough to figure out why cell count is 0 --- .../HubSpotCellBasedCandidateGenerator.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 1a650888714c..7746414a8f82 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -59,7 +59,7 @@ for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { int[] regionsForServer = regionsPerServer[serverIndex]; - int cellsOnServer = numCells(cluster, regionsForServer); + int cellsOnServer = numCells(cluster, serverIndex, regionsForServer); if (LOG.isTraceEnabled()) { LOG.trace("Server {} has {} regions, which have {} cells", @@ -91,7 +91,7 @@ return action; } - private int numCells(BalancerClusterState cluster, int[] regions) { + private int numCells(BalancerClusterState cluster, int serverIndex, int[] regions) { boolean[] cellsPresent = new boolean[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int regionIndex : regions) { @@ -111,6 +111,10 @@ private int numCells(BalancerClusterState cluster, int[] regions) { byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); + if (LOG.isTraceEnabled()) { + LOG.trace("{} [{}]: eval {} - {}", serverIndex, regionIndex, Bytes.toHex(startKey), Bytes.toHex(endKey)); + } + short startCellId = (startKey == null || startKey.length == 0) ? 0 : (startKey.length >= 2 ? @@ -130,11 +134,21 @@ private int numCells(BalancerClusterState cluster, int[] regions) { endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } + if (LOG.isTraceEnabled()) { + LOG.trace("Evaluating {}[{}]: cells {} - {}", serverIndex, regionIndex, startCellId, endCellId); + } + for (short i = startCellId; i < endCellId; i++) { + if (LOG.isTraceEnabled()) { + LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, i); + } cellsPresent[i] = true; } if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { + if (LOG.isTraceEnabled()) { + LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, endKey); + } cellsPresent[endCellId] = true; } } From 583aca97c3f1e6896a613010abfe90de15e2e764 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 17:49:04 -0500 Subject: [PATCH 056/126] Fix the subtle array access bug --- .../HubSpotCellBasedCandidateGenerator.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 7746414a8f82..20bf42931ccd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -91,11 +91,14 @@ return action; } - private int numCells(BalancerClusterState cluster, int serverIndex, int[] regions) { + private int numCells(BalancerClusterState cluster, int serverIndex, int[] regionsForServer) { boolean[] cellsPresent = new boolean[HubSpotCellCostFunction.MAX_CELL_COUNT]; - for (int regionIndex : regions) { - if (regionIndex < 0 || regionIndex > regions.length) { + for (int regionIndex : regionsForServer) { + if (regionIndex < 0 || regionIndex > cluster.regions.length) { + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping region {} because it's <0 or >{}", regionIndex, regionsForServer.length); + } continue; } @@ -135,19 +138,19 @@ private int numCells(BalancerClusterState cluster, int serverIndex, int[] region } if (LOG.isTraceEnabled()) { - LOG.trace("Evaluating {}[{}]: cells {} - {}", serverIndex, regionIndex, startCellId, endCellId); + LOG.trace("Evaluating {} [{}]: cells {} - {}", serverIndex, regionIndex, startCellId, endCellId); } for (short i = startCellId; i < endCellId; i++) { if (LOG.isTraceEnabled()) { - LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, i); + LOG.trace("{} [{}]: marking cell {}", serverIndex, regionIndex, i); } cellsPresent[i] = true; } if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { if (LOG.isTraceEnabled()) { - LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, endKey); + LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, endCellId); } cellsPresent[endCellId] = true; } From 26b4f210f11b117e7f552e3c8dbe46c012e95f85 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 19:27:16 -0500 Subject: [PATCH 057/126] Undo memoization on cluster state change, and allow to trace teh balancer exploration --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 5 +++++ .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index b4962132bede..3f4884857f53 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -96,6 +96,11 @@ void prepare(BalancerClusterState cluster) { return cluster.tables.stream().anyMatch(name -> name.contains("objects-3")); } + @Override protected void regionMoved(int region, int oldServer, int newServer) { + super.regionMoved(region, oldServer, newServer); + this.isCostUpToDate.set(false); + } + private String snapshotState() { StringBuilder stateString = new StringBuilder(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index a2db3a8e1f37..34893ef60890 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -581,6 +581,11 @@ protected List balanceTable(TableName tableName, newCost = computeCost(cluster, currentCost); + if(LOG.isTraceEnabled()) { + LOG.trace("[{}] Step: {} newCost: {} currentCost: {} action: {}", + tableName.getNameWithNamespaceInclAsString(), step, newCost, currentCost, action); + } + // Should this be kept? if (newCost < currentCost) { currentCost = newCost; From bbed1886107316389ed39ff035288ea51e2af1f3 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 19:29:08 -0500 Subject: [PATCH 058/126] Also emit the full cost breakdown per step --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 34893ef60890..c9b4d71240ec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -582,8 +582,8 @@ protected List balanceTable(TableName tableName, newCost = computeCost(cluster, currentCost); if(LOG.isTraceEnabled()) { - LOG.trace("[{}] Step: {} newCost: {} currentCost: {} action: {}", - tableName.getNameWithNamespaceInclAsString(), step, newCost, currentCost, action); + LOG.trace("S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); } // Should this be kept? From dbe3263f3339933c6c56ba11dfb6a242f31373c2 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 20:12:58 -0500 Subject: [PATCH 059/126] Rework the cost function to be the number of cells (over all servers) above the best possible isolation --- .../master/balancer/HubSpotCellCostFunction.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 3f4884857f53..6a5fafb4da0c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -226,7 +226,7 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions[i].getEndKey(), numCells); } - int[] cellsPerServer = new int[numServers]; + int cost = 0; for (int i = 0; i < numServers; i++) { int cellsOnThisServer = 0; for (int j = 0; j < numCells; j++) { @@ -235,17 +235,10 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] } } - cellsPerServer[i] = cellsOnThisServer; + cost += Math.max(cellsOnThisServer - bestCaseMaxCellsPerServer, 0); } - Map stats = Quantiles.scale(100) - .indexes(0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(cellsPerServer); - - AtomicDouble totalCost = new AtomicDouble(0.0); - stats.forEach((percentile, value) -> totalCost.addAndGet(value)); - - return (int) Math - .round(Math.max(0, totalCost.get() / stats.size() - bestCaseMaxCellsPerServer)); + return cost; } private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, byte[] endKey, From 41a0b41f0b62b421aba5db77db8e56f6dff81d43 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 21:12:54 -0500 Subject: [PATCH 060/126] Update debug to focus on which region/cells are getting picked --- .../HubSpotCellBasedCandidateGenerator.java | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 20bf42931ccd..20bc9e7f889d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -82,13 +82,7 @@ } } - BalanceAction action = maybeMoveRegion(cluster, serverWithMostCells); - - if (LOG.isDebugEnabled() && action.getType() != BalanceAction.Type.NULL) { - LOG.debug("Attempting {} ({} cells max)", action.toString(), mostCellsPerServerSoFar); - } - - return action; + return maybeMoveRegion(cluster, serverWithMostCells); } private int numCells(BalancerClusterState cluster, int serverIndex, int[] regionsForServer) { @@ -114,10 +108,6 @@ private int numCells(BalancerClusterState cluster, int serverIndex, int[] region byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - if (LOG.isTraceEnabled()) { - LOG.trace("{} [{}]: eval {} - {}", serverIndex, regionIndex, Bytes.toHex(startKey), Bytes.toHex(endKey)); - } - short startCellId = (startKey == null || startKey.length == 0) ? 0 : (startKey.length >= 2 ? @@ -137,21 +127,11 @@ private int numCells(BalancerClusterState cluster, int serverIndex, int[] region endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; } - if (LOG.isTraceEnabled()) { - LOG.trace("Evaluating {} [{}]: cells {} - {}", serverIndex, regionIndex, startCellId, endCellId); - } - for (short i = startCellId; i < endCellId; i++) { - if (LOG.isTraceEnabled()) { - LOG.trace("{} [{}]: marking cell {}", serverIndex, regionIndex, i); - } cellsPresent[i] = true; } if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { - if (LOG.isTraceEnabled()) { - LOG.trace("{}[{}]: marking cell {}", serverIndex, regionIndex, endCellId); - } cellsPresent[endCellId] = true; } } @@ -206,7 +186,33 @@ BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCe regionWithFewestInstancesOfCellsPresent); if (LOG.isTraceEnabled()) { - LOG.trace("Moving s{}.r{} to {}", serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer); + Multimap cellsByRegionOnTarget = + computeCellsByRegion(cluster.regionsPerServer[targetServer], cluster.regions); + + Set currentCellsOnSource = new HashSet<>(cellsByRegionOnSource.values()); + Set currentCellsOnTarget = new HashSet<>(cellsByRegionOnTarget.values()); + + Set afterMoveCellsOnSource = cellsByRegionOnSource.keySet().stream() + .filter(region -> region != regionWithFewestInstancesOfCellsPresent) + .flatMap(region -> cellsByRegionOnSource.get(region).stream()) + .collect(Collectors.toSet()); + Set afterMoveCellsOnTarget = new HashSet<>(currentCellsOnTarget); + afterMoveCellsOnTarget.addAll( + cellsByRegionOnSource.get(regionWithFewestInstancesOfCellsPresent)); + + boolean sourceImproves = afterMoveCellsOnSource.size() < currentCellsOnSource.size(); + boolean targetStaysSame = afterMoveCellsOnTarget.size() == currentCellsOnTarget.size(); + + LOG.trace("Moving s{}.r{} to {}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", + serverWithMostCells, + regionWithFewestInstancesOfCellsPresent, + targetServer, + currentCellsOnSource.size(), + afterMoveCellsOnSource.size(), + currentCellsOnTarget.size(), + afterMoveCellsOnTarget.size(), + (sourceImproves && targetStaysSame) ? "GOOD" : ((sourceImproves) ? "NEUTRAL" : "BAD") + ); } return getAction(serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer, -1); From 1873181f454184eefbf31e738eaf103daa4c7d06 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 7 Nov 2024 21:54:59 -0500 Subject: [PATCH 061/126] Tweak down to trace --- .../HubSpotCellBasedCandidateGenerator.java | 4 +- .../balancer/HubSpotCellCostFunction.java | 39 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 20bc9e7f889d..c0d0a53257ae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -185,7 +185,7 @@ BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCe int targetServer = computeBestServerToReceiveRegion(cluster, serverWithMostCells, regionWithFewestInstancesOfCellsPresent); - if (LOG.isTraceEnabled()) { + if (LOG.isDebugEnabled()) { Multimap cellsByRegionOnTarget = computeCellsByRegion(cluster.regionsPerServer[targetServer], cluster.regions); @@ -203,7 +203,7 @@ BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCe boolean sourceImproves = afterMoveCellsOnSource.size() < currentCellsOnSource.size(); boolean targetStaysSame = afterMoveCellsOnTarget.size() == currentCellsOnTarget.size(); - LOG.trace("Moving s{}.r{} to {}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", + LOG.debug("Moving s{}.r{} to {}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 6a5fafb4da0c..e380a86ee89a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.Arrays; -import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; @@ -31,15 +30,12 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; -import org.apache.hbase.thirdparty.com.google.common.util.concurrent.AtomicDouble; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; -import org.apache.hbase.thirdparty.com.google.common.math.Quantiles; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; /** @@ -87,8 +83,8 @@ void prepare(BalancerClusterState cluster) { this.isCostUpToDate.set(false); this.memoizedCostSupplier = Suppliers.memoize(() -> 0.0); - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing {}", snapshotState()); + if (LOG.isTraceEnabled()) { + LOG.trace("Initializing {}", snapshotState()); } } @@ -168,9 +164,9 @@ protected double cost() { if ( regions != null && regions.length > 0 - && regions[0].getTable().getNamespaceAsString().equals("default") && LOG.isDebugEnabled() + && regions[0].getTable().getNamespaceAsString().equals("default") && LOG.isTraceEnabled() ) { - LOG.debug("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); + LOG.trace("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); } return cost; @@ -182,17 +178,17 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); - if (LOG.isDebugEnabled()) { + if (LOG.isTraceEnabled()) { Set tableAndNamespace = Arrays.stream(regions).map(RegionInfo::getTable) .map(table -> table.getNameAsString() + "." + table.getNamespaceAsString()) .collect(Collectors.toSet()); - LOG.debug("Calculating current cell cost for {} regions from these tables {}", regions.length, + LOG.trace("Calculating current cell cost for {} regions from these tables {}", regions.length, tableAndNamespace); } if (regions.length > 0 && !regions[0].getTable().getNamespaceAsString().equals("default")) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skipping cost calculation for non-default namespace on {}", + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping cost calculation for non-default namespace on {}", regions[0].getTable().getNameWithNamespaceInclAsString()); } return 0; @@ -227,15 +223,26 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] } int cost = 0; - for (int i = 0; i < numServers; i++) { + StringBuilder debugBuilder = new StringBuilder().append("["); + for (int server = 0; server < numServers; server++) { int cellsOnThisServer = 0; for (int j = 0; j < numCells; j++) { - if (serverHasCell[i][j]) { + if (serverHasCell[server][j]) { cellsOnThisServer++; } } - cost += Math.max(cellsOnThisServer - bestCaseMaxCellsPerServer, 0); + int costForThisServer = Math.max(cellsOnThisServer - bestCaseMaxCellsPerServer, 0); + if (LOG.isDebugEnabled()) { + debugBuilder.append(server).append("=").append(costForThisServer).append(", "); + } + cost += costForThisServer; + } + + debugBuilder.append("]"); + + if (LOG.isDebugEnabled()) { + LOG.debug("Cost {} from {}", cost, debugBuilder); } return cost; From adf28a6ebb6e9dfbc42e8208d2c4dd62fa9488bd Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 8 Nov 2024 09:07:32 -0500 Subject: [PATCH 062/126] Rework how the cost function calculates and updates cost --- .../balancer/HubSpotCellCostFunction.java | 141 ++++++++++++------ .../balancer/TestHubSpotCellCostFunction.java | 37 +++-- 2 files changed, 126 insertions(+), 52 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index e380a86ee89a..1c0da350cc40 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -18,11 +18,10 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.Arrays; +import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.conf.Configuration; @@ -34,7 +33,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import org.apache.hbase.thirdparty.com.google.common.base.Suppliers; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; @@ -53,22 +53,19 @@ public class HubSpotCellCostFunction extends CostFunction { private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; - private static final byte PAD_START_KEY = 0; - private static final byte PAD_END_KEY = -1; - - private final AtomicBoolean isCostUpToDate; private int numServers; private short numCells; private ServerName[] servers; - private RegionInfo[] regions; // not necessarily sorted + private RegionInfo[] regions; private int[][] regionLocations; - private Supplier memoizedCostSupplier; + private boolean[][] serverHasCell; + private int bestCaseMaxCellsPerServer; + private int numRegionCellsOverassigned; HubSpotCellCostFunction(Configuration conf) { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); - this.isCostUpToDate = new AtomicBoolean(false); } @Override @@ -80,11 +77,24 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - this.isCostUpToDate.set(false); - this.memoizedCostSupplier = Suppliers.memoize(() -> 0.0); - - if (LOG.isTraceEnabled()) { - LOG.trace("Initializing {}", snapshotState()); + this.serverHasCell = new boolean[numServers][numCells]; + this.bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); + this.numRegionCellsOverassigned = + calculateCurrentCellCost( + numCells, + numServers, + bestCaseMaxCellsPerServer, + regions, + regionLocations, + serverHasCell, + super.cluster::getRegionSizeMB + ); + + if (regions.length > 0 + && regions[0].getTable().getNamespaceAsString().equals("default") + && LOG.isTraceEnabled() + ) { + LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), snapshotState()); } } @@ -93,14 +103,66 @@ void prepare(BalancerClusterState cluster) { } @Override protected void regionMoved(int region, int oldServer, int newServer) { - super.regionMoved(region, oldServer, newServer); - this.isCostUpToDate.set(false); + RegionInfo movingRegion = regions[region]; + + if (!movingRegion.getTable().getNamespaceAsString().equals("default")) { + return; + } + + Set cellsOnRegion = toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + Map numRegionsForCellOnOldServer = computeCellFrequencyForServer(oldServer); + Map numRegionsForCellOnNewServer = computeCellFrequencyForServer(newServer); + + int currentCellCountOldServer = numRegionsForCellOnOldServer.keySet().size(); + int currentCellCountNewServer = numRegionsForCellOnNewServer.keySet().size(); + + int changeInOverassignedRegionCells = 0; + for (short movingCell : cellsOnRegion) { + int oldServerCellCount = numRegionsForCellOnOldServer.get(movingCell); + int newServerCellCount = numRegionsForCellOnNewServer.get(movingCell); + + if (oldServerCellCount == 1) { + if (currentCellCountOldServer > bestCaseMaxCellsPerServer) { + changeInOverassignedRegionCells--; + } + serverHasCell[oldServer][movingCell] = false; + } + + if (newServerCellCount == 0) { + if (currentCellCountNewServer > bestCaseMaxCellsPerServer) { + changeInOverassignedRegionCells++; + } + serverHasCell[newServer][movingCell] = true; + } + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, changeInOverassignedRegionCells); + } + + numRegionCellsOverassigned += changeInOverassignedRegionCells; + } + + private Map computeCellFrequencyForServer(int server) { + int[] regions = cluster.regionsPerServer[server]; + ImmutableMultimap.Builder regionsByCell = ImmutableMultimap.builder(); + for (int regionIndex : regions) { + RegionInfo region = cluster.regions[regionIndex]; + Set cellsInRegion = toCells(region.getStartKey(), region.getEndKey(), numCells); + cellsInRegion.forEach(cell -> regionsByCell.put(cell, regionIndex)); + } + + return regionsByCell.build() + .asMap() + .entrySet() + .stream() + .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().size())); } private String snapshotState() { StringBuilder stateString = new StringBuilder(); - stateString.append("HubSpotCellCostFunction[0] config for ") + stateString.append("HubSpotCellCostFunction config for ") .append(Optional.ofNullable(regions[0]).map(RegionInfo::getTable) .map(TableName::getNameWithNamespaceInclAsString).orElseGet(() -> "N/A")) .append(":").append("\n\tnumServers=").append(numServers).append("\n\tnumCells=") @@ -153,28 +215,19 @@ private static String toCellSetString(Set cells) { @Override protected double cost() { - if (isCostUpToDate.get()) { - return memoizedCostSupplier.get(); - } - - double cost = calculateCurrentCellCost(numCells, numServers, regions, regionLocations, super.cluster::getRegionSizeMB); - - this.memoizedCostSupplier = Suppliers.memoize(() -> cost); - this.isCostUpToDate.set(true); - - if ( - regions != null && regions.length > 0 - && regions[0].getTable().getNamespaceAsString().equals("default") && LOG.isTraceEnabled() - ) { - LOG.trace("Evaluated (cost={}) {}", String.format("%.2f", cost), snapshotState()); - } - - return cost; + return numRegionCellsOverassigned; } - static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] regions, - int[][] regionLocations, Function getRegionSizeMbFunc) { - int bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); + static int calculateCurrentCellCost( + short numCells, + int numServers, + int bestCaseMaxCellsPerServer, + RegionInfo[] regions, + int[][] regionLocations, + boolean[][] serverHasCell, + Function getRegionSizeMbFunc + ) { + Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); @@ -194,7 +247,6 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] return 0; } - boolean[][] serverHasCell = new boolean[numServers][numCells]; for (int i = 0; i < regions.length; i++) { if (regions[i] == null) { throw new IllegalStateException("No region available at index " + i); @@ -239,17 +291,20 @@ static int calculateCurrentCellCost(short numCells, int numServers, RegionInfo[] cost += costForThisServer; } - debugBuilder.append("]"); - if (LOG.isDebugEnabled()) { + debugBuilder.append("]"); LOG.debug("Cost {} from {}", cost, debugBuilder); } return cost; } - private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, byte[] endKey, - short numCells) { + private static void setCellsForServer( + boolean[] serverHasCell, + byte[] startKey, + byte[] endKey, + short numCells + ) { short startCellId = (startKey == null || startKey.length == 0) ? 0 : (startKey.length >= 2 diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index 1d3d95a954d2..73a5a3a43a2e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -105,21 +105,40 @@ public void testCellCountBothEndsNull() { @Test public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }, ALL_REGIONS_SIZE_1_MB); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost + ((short) 4, + 4, + 1, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }, + new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, + ALL_REGIONS_SIZE_1_MB + ); assertEquals(0, cost); } @Test public void testCostImbalanced() { - // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }, ALL_REGIONS_SIZE_1_MB); + // 4 cells, 4 servers, imbalanced + int cost = HubSpotCellCostFunction.calculateCurrentCellCost( + (short) 4, + 4, + 1, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }, + new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, + ALL_REGIONS_SIZE_1_MB); assertTrue(cost > 0); } From 06e6b83aad7cd74256a9a200a57583e60a92240d Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 8 Nov 2024 09:13:01 -0500 Subject: [PATCH 063/126] Fix edge case for short rowkeys --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 1c0da350cc40..ee40dc1ec14b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -336,7 +336,7 @@ private static void setCellsForServer( } static boolean isStopExclusive(byte[] endKey) { - return endKey != null && endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2); + return endKey != null && endKey.length == 2 || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); } static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { From 4f61691190d4c12c70979d69f3f2188f6bd86f06 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 8 Nov 2024 10:05:02 -0500 Subject: [PATCH 064/126] Add debug and fix the state error here --- .../balancer/HubSpotCellCostFunction.java | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index ee40dc1ec14b..e6e6c1c08a62 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -110,15 +110,38 @@ void prepare(BalancerClusterState cluster) { } Set cellsOnRegion = toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + + if (LOG.isDebugEnabled()) { + LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", + region, + Bytes.toHex(movingRegion.getStartKey()), + Bytes.toHex(movingRegion.getEndKey()), + cellsOnRegion + ); + } + Map numRegionsForCellOnOldServer = computeCellFrequencyForServer(oldServer); Map numRegionsForCellOnNewServer = computeCellFrequencyForServer(newServer); int currentCellCountOldServer = numRegionsForCellOnOldServer.keySet().size(); int currentCellCountNewServer = numRegionsForCellOnNewServer.keySet().size(); + if (LOG.isDebugEnabled()) { + LOG.debug( + "Old server {} [{}] has cell frequency of {}.\n\nNew server {} [{}] has cell frequency of {}.", + oldServer, + currentCellCountOldServer, + numRegionsForCellOnOldServer, + newServer, + currentCellCountNewServer, + numRegionsForCellOnNewServer + ); + } + int changeInOverassignedRegionCells = 0; for (short movingCell : cellsOnRegion) { - int oldServerCellCount = numRegionsForCellOnOldServer.get(movingCell); + // this is invoked AFTER the region has been moved + int oldServerCellCount = numRegionsForCellOnOldServer.getOrDefault(movingCell, 0) + 1; int newServerCellCount = numRegionsForCellOnNewServer.get(movingCell); if (oldServerCellCount == 1) { From c6a84ec8daf150db970c644dfff8abb4c1e8cade Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 8 Nov 2024 17:06:28 -0500 Subject: [PATCH 065/126] No noop --- .../balancer/HubSpotCellBasedCandidateGenerator.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index c0d0a53257ae..2c3d814ad399 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -38,7 +38,6 @@ private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); - private static final double CHANCE_OF_NOOP = 0.2; @Override BalanceAction generate(BalancerClusterState cluster) { if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { @@ -161,13 +160,6 @@ BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCe return BalanceAction.NULL_ACTION; } - if (ThreadLocalRandom.current().nextFloat() < CHANCE_OF_NOOP) { - if (LOG.isTraceEnabled()) { - LOG.trace("Randomly taking no action. Chaos! Mwahahaha!"); - } - return BalanceAction.NULL_ACTION; - } - Multimap cellsByRegionOnSource = computeCellsByRegion(cluster.regionsPerServer[serverWithMostCells], cluster.regions); Map countOfRegionsForCellOnSource = new HashMap<>(); From f40e83b02e67ba3f46cf971e5ae550eb08d025d2 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 8 Nov 2024 17:14:45 -0500 Subject: [PATCH 066/126] Print which generator we've selected --- .../master/balancer/StochasticLoadBalancer.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index c9b4d71240ec..224b43f3e0d5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -453,6 +453,9 @@ protected CandidateGenerator getRandomGenerator() { weightsOfGenerators[i] = sum; } if (sum == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Using {}", candidateGenerators.get(0).getClass().getSimpleName()); + } return candidateGenerators.get(0); } for (int i = 0; i < weightsOfGenerators.length; i++) { @@ -461,10 +464,19 @@ protected CandidateGenerator getRandomGenerator() { double rand = ThreadLocalRandom.current().nextDouble(); for (int i = 0; i < weightsOfGenerators.length; i++) { if (rand <= weightsOfGenerators[i]) { - return candidateGenerators.get(i); + CandidateGenerator generator = candidateGenerators.get(i); + if (LOG.isDebugEnabled()) { + LOG.debug("Using {}", generator.getClass().getSimpleName()); + } + return generator; } } - return candidateGenerators.get(candidateGenerators.size() - 1); + + CandidateGenerator generator = candidateGenerators.get(candidateGenerators.size() - 1); + if (LOG.isDebugEnabled()) { + LOG.debug("Using {}", generator.getClass().getSimpleName()); + } + return generator; } @RestrictedApi(explanation = "Should only be called in tests", link = "", From 7011733d64b2f52ebe8a31f2c06d2c2f4f4d288b Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 12:38:09 -0500 Subject: [PATCH 067/126] Tweak logs to allow for local run --- .../master/balancer/HubSpotCellCostFunction.java | 14 ++++++++++++++ .../master/balancer/StochasticLoadBalancer.java | 15 ++------------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index e6e6c1c08a62..20fe8088439a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.master.balancer; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -24,6 +26,7 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; +import javax.ws.rs.core.MediaType; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; @@ -50,6 +53,9 @@ public class HubSpotCellCostFunction extends CostFunction { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; + private static final ObjectMapper OBJECT_MAPPER = new JacksonJaxbJsonProvider().locateMapper( + BalancerClusterState.class, + MediaType.APPLICATION_JSON_TYPE); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; @@ -77,6 +83,14 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); + if (LOG.isTraceEnabled()) { + try { + LOG.trace("Cluster state:\n{}", OBJECT_MAPPER.writeValueAsString(cluster)); + } catch (Exception ex) { + LOG.error("Failed to write cluster state", ex); + } + } + this.serverHasCell = new boolean[numServers][numCells]; this.bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); this.numRegionCellsOverassigned = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 224b43f3e0d5..c7f872fc0844 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -453,9 +453,6 @@ protected CandidateGenerator getRandomGenerator() { weightsOfGenerators[i] = sum; } if (sum == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug("Using {}", candidateGenerators.get(0).getClass().getSimpleName()); - } return candidateGenerators.get(0); } for (int i = 0; i < weightsOfGenerators.length; i++) { @@ -464,19 +461,11 @@ protected CandidateGenerator getRandomGenerator() { double rand = ThreadLocalRandom.current().nextDouble(); for (int i = 0; i < weightsOfGenerators.length; i++) { if (rand <= weightsOfGenerators[i]) { - CandidateGenerator generator = candidateGenerators.get(i); - if (LOG.isDebugEnabled()) { - LOG.debug("Using {}", generator.getClass().getSimpleName()); - } - return generator; + return candidateGenerators.get(i); } } - CandidateGenerator generator = candidateGenerators.get(candidateGenerators.size() - 1); - if (LOG.isDebugEnabled()) { - LOG.debug("Using {}", generator.getClass().getSimpleName()); - } - return generator; + return candidateGenerators.get(candidateGenerators.size() - 1); } @RestrictedApi(explanation = "Should only be called in tests", link = "", From bd8360230bd7b937e3809625505c55f59dcc25bb Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 12:50:10 -0500 Subject: [PATCH 068/126] use shaded version --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 20fe8088439a..139ab5bb1c3d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -26,12 +24,12 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; -import javax.ws.rs.core.MediaType; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.htrace.shaded.fasterxml.jackson.databind.ObjectMapper; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,9 +51,7 @@ public class HubSpotCellCostFunction extends CostFunction { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - private static final ObjectMapper OBJECT_MAPPER = new JacksonJaxbJsonProvider().locateMapper( - BalancerClusterState.class, - MediaType.APPLICATION_JSON_TYPE); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; From 5d69edc6c41b9e59103704997cd19e1b07bb16cc Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 12:56:50 -0500 Subject: [PATCH 069/126] Use gson --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 139ab5bb1c3d..d32f061121fc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.htrace.shaded.fasterxml.jackson.databind.ObjectMapper; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,6 +37,7 @@ import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; +import org.apache.hbase.thirdparty.com.google.gson.Gson; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This @@ -51,7 +51,7 @@ public class HubSpotCellCostFunction extends CostFunction { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final Gson OBJECT_MAPPER = new Gson(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; @@ -81,7 +81,7 @@ void prepare(BalancerClusterState cluster) { if (LOG.isTraceEnabled()) { try { - LOG.trace("Cluster state:\n{}", OBJECT_MAPPER.writeValueAsString(cluster)); + LOG.trace("Cluster state:\n{}", OBJECT_MAPPER.toJson(cluster)); } catch (Exception ex) { LOG.error("Failed to write cluster state", ex); } From d8cef32fa830f171ee2cf9e0113e4d308461f055 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 17:05:08 -0500 Subject: [PATCH 070/126] Try exposing only specific fields --- .../org/apache/hadoop/hbase/ServerName.java | 7 +- .../master/balancer/BalancerClusterState.java | 97 ++++++++++--------- .../balancer/HubSpotCellCostFunction.java | 5 +- 3 files changed, 57 insertions(+), 52 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 5223bac3e5b1..9f32e64b9ff4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; @@ -82,15 +83,15 @@ public class ServerName implements Comparable, Serializable { */ public static final String UNKNOWN_SERVERNAME = "#unknown#"; - private final String serverName; - private final long startCode; + @Expose private final String serverName; + @Expose private final long startCode; private transient Address address; /** * Cached versioned bytes of this ServerName instance. * @see #getVersionedBytes() */ - private byte[] bytes; + @Expose private byte[] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList<>(0); /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 8507a13a8128..de1cb5793017 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,68 +53,68 @@ class BalancerClusterState { private static final Logger LOG = LoggerFactory.getLogger(BalancerClusterState.class); - ServerName[] servers; + @Expose ServerName[] servers; // ServerName uniquely identifies a region server. multiple RS can run on the same host - String[] hosts; - String[] racks; - boolean multiServersPerHost = false; // whether or not any host has more than one server + @Expose String[] hosts; + @Expose String[] racks; + @Expose boolean multiServersPerHost = false; // whether or not any host has more than one server - ArrayList tables; - RegionInfo[] regions; - Deque[] regionLoads; + @Expose ArrayList tables; + @Expose RegionInfo[] regions; + @Expose Deque[] regionLoads; private RegionLocationFinder regionFinder; - int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality + @Expose int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality - int[] serverIndexToHostIndex; // serverIndex -> host index - int[] serverIndexToRackIndex; // serverIndex -> rack index + @Expose int[] serverIndexToHostIndex; // serverIndex -> host index + @Expose int[] serverIndexToRackIndex; // serverIndex -> rack index - int[][] regionsPerServer; // serverIndex -> region list - int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list - int[][] regionsPerHost; // hostIndex -> list of regions - int[][] regionsPerRack; // rackIndex -> region list - Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated + @Expose int[][] regionsPerServer; // serverIndex -> region list + @Expose int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list + @Expose int[][] regionsPerHost; // hostIndex -> list of regions + @Expose int[][] regionsPerRack; // rackIndex -> region list + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated // replicas by primary region index - Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by // primary region index - Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by // primary region index - int[][] serversPerHost; // hostIndex -> list of server indexes - int[][] serversPerRack; // rackIndex -> list of server indexes - int[] regionIndexToServerIndex; // regionIndex -> serverIndex - int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) - int[] regionIndexToTableIndex; // regionIndex -> tableIndex - int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions - int[] numRegionsPerTable; // tableIndex -> region count - int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS - int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary - boolean hasRegionReplicas = false; // whether there is regions with replicas - - Integer[] serverIndicesSortedByRegionCount; - Integer[] serverIndicesSortedByLocality; - - Map serversToIndex; - Map hostsToIndex; - Map racksToIndex; - Map tablesToIndex; - Map regionsToIndex; - float[] localityPerServer; - - int numServers; - int numHosts; - int numRacks; - int numTables; - int numRegions; - - int numMovedRegions = 0; // num moved regions from the initial configuration - Map> clusterState; + @Expose int[][] serversPerHost; // hostIndex -> list of server indexes + @Expose int[][] serversPerRack; // rackIndex -> list of server indexes + @Expose int[] regionIndexToServerIndex; // regionIndex -> serverIndex + @Expose int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) + @Expose int[] regionIndexToTableIndex; // regionIndex -> tableIndex + @Expose int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions + @Expose int[] numRegionsPerTable; // tableIndex -> region count + @Expose int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + @Expose int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary + @Expose boolean hasRegionReplicas = false; // whether there is regions with replicas + + @Expose Integer[] serverIndicesSortedByRegionCount; + @Expose Integer[] serverIndicesSortedByLocality; + + @Expose Map serversToIndex; + @Expose Map hostsToIndex; + @Expose Map racksToIndex; + @Expose Map tablesToIndex; + @Expose Map regionsToIndex; + @Expose float[] localityPerServer; + + @Expose int numServers; + @Expose int numHosts; + @Expose int numRacks; + @Expose int numTables; + @Expose int numRegions; + + @Expose int numMovedRegions = 0; // num moved regions from the initial configuration + @Expose Map> clusterState; private final RackManager rackManager; // Maps region -> rackIndex -> locality of region on rack - private float[][] rackLocalities; + @Expose private float[][] rackLocalities; // Maps localityType -> region -> [server|rack]Index with highest locality - private int[][] regionsToMostLocalEntities; + @Expose private int[][] regionsToMostLocalEntities; static class DefaultRackManager extends RackManager { @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index d32f061121fc..9a3a3c9480b0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -38,6 +38,7 @@ import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; import org.apache.hbase.thirdparty.com.google.gson.Gson; +import org.apache.hbase.thirdparty.com.google.gson.GsonBuilder; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This @@ -51,7 +52,9 @@ public class HubSpotCellCostFunction extends CostFunction { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - private static final Gson OBJECT_MAPPER = new Gson(); + private static final Gson OBJECT_MAPPER = new GsonBuilder() + .excludeFieldsWithoutExposeAnnotation() + .create(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; From e1f1da1a3df11aada6a7f20386e57cf5ea85c43a Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 18:31:40 -0500 Subject: [PATCH 071/126] Only emit objects-3, and include the full region info --- .../hbase/client/MutableRegionInfo.java | 23 ++++++++++--------- .../org/apache/hadoop/hbase/TableName.java | 17 +++++++------- .../balancer/HubSpotCellCostFunction.java | 4 ++-- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index a9382f3a9bed..81e6d478b79d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,17 +55,17 @@ class MutableRegionInfo implements RegionInfo { // zookeeper as of 0.90.0 HBase. And now in DisableTableProcedure, finally we will create bunch // of UnassignProcedures and at the last of the procedure we will set the region state to // CLOSED, and will not change the offLine flag. - private boolean offLine; - private boolean split; - private final long regionId; - private final int replicaId; - private final byte[] regionName; - private final byte[] startKey; - private final byte[] endKey; - private final int hashCode; - private final String encodedName; - private final byte[] encodedNameAsBytes; - private final TableName tableName; + @Expose private boolean offLine; + @Expose private boolean split; + @Expose private final long regionId; + @Expose private final int replicaId; + @Expose private final byte[] regionName; + @Expose private final byte[] startKey; + @Expose private final byte[] endKey; + @Expose private final int hashCode; + @Expose private final String encodedName; + @Expose private final byte[] encodedNameAsBytes; + @Expose private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, final byte[] endKey, final long regionId, final int replicaId, boolean offLine, diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index c799fb9b2f78..0b077b724786 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -24,6 +24,7 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; @@ -93,14 +94,14 @@ public static boolean isMetaTableName(final TableName tn) { */ public static final TableName OLD_META_TABLE_NAME = getADummyTableName(OLD_META_STR); - private final byte[] name; - private final String nameAsString; - private final byte[] namespace; - private final String namespaceAsString; - private final byte[] qualifier; - private final String qualifierAsString; - private final boolean systemTable; - private final int hashCode; + @Expose private final byte[] name; + @Expose private final String nameAsString; + @Expose private final byte[] namespace; + @Expose private final String namespaceAsString; + @Expose private final byte[] qualifier; + @Expose private final String qualifierAsString; + @Expose private final boolean systemTable; + @Expose private final int hashCode; /** * Check passed byte array, "tableName", is legal user-space table name. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 9a3a3c9480b0..3e88c38cc908 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -82,9 +82,9 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - if (LOG.isTraceEnabled()) { + if (LOG.isTraceEnabled() && cluster.tables.contains("objects-3")) { try { - LOG.trace("Cluster state:\n{}", OBJECT_MAPPER.toJson(cluster)); + LOG.trace("{} cluster state:\n{}", cluster.tables, OBJECT_MAPPER.toJson(cluster)); } catch (Exception ex) { LOG.error("Failed to write cluster state", ex); } From eb524460bce205e92dd6ca25724cd7c55e975b46 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 19:22:13 -0500 Subject: [PATCH 072/126] Mark as exposed --- .../src/main/java/org/apache/hadoop/hbase/net/Address.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index ef3520b31c78..5b35bfbd0edb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.StringUtils; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; @@ -37,7 +38,7 @@ */ @InterfaceAudience.Public public class Address implements Comparable
{ - private final HostAndPort hostAndPort; + @Expose private final HostAndPort hostAndPort; private Address(HostAndPort hostAndPort) { this.hostAndPort = hostAndPort; From eca1dfec7014dae5aff06b947652ab2b8cc1ef53 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 18 Nov 2024 19:44:35 -0500 Subject: [PATCH 073/126] Refine when we print, and what --- .../balancer/HubSpotCellCostFunction.java | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 3e88c38cc908..a89f274b5356 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -37,8 +38,14 @@ import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; +import org.apache.hbase.thirdparty.com.google.gson.ExclusionStrategy; +import org.apache.hbase.thirdparty.com.google.gson.FieldAttributes; import org.apache.hbase.thirdparty.com.google.gson.Gson; import org.apache.hbase.thirdparty.com.google.gson.GsonBuilder; +import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializationContext; +import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializer; +import org.apache.hbase.thirdparty.com.google.gson.JsonElement; +import org.apache.hbase.thirdparty.com.google.gson.JsonParseException; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This @@ -54,6 +61,25 @@ public class HubSpotCellCostFunction extends CostFunction { "hbase.master.balancer.stochastic.hubspotCellCost"; private static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation() + .enableComplexMapKeySerialization() + .registerTypeAdapter(RegionInfo.class, new JsonDeserializer(){ + @Override public Object deserialize(JsonElement json, Type typeOfT, + JsonDeserializationContext context) throws JsonParseException { + return null; + } + }) + .addDeserializationExclusionStrategy(new ExclusionStrategy() { + @Override public boolean shouldSkipField(FieldAttributes f) { + return f.getName().equals("serversToIndex") + || f.getName().equals("regionsToIndex") + || f.getName().equals("clusterState") + ; + } + + @Override public boolean shouldSkipClass(Class clazz) { + return false; + } + }) .create(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now @@ -82,7 +108,11 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - if (LOG.isTraceEnabled() && cluster.tables.contains("objects-3")) { + if (LOG.isTraceEnabled() + && cluster.tables.contains("objects-3") + && cluster.regions != null + && cluster.regions.length > 0 + ) { try { LOG.trace("{} cluster state:\n{}", cluster.tables, OBJECT_MAPPER.toJson(cluster)); } catch (Exception ex) { From d68f91a6eadef06b7416f8ef033db75673e2a839 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 19 Nov 2024 10:33:41 -0500 Subject: [PATCH 074/126] Update serde for int2int map so we can run the balancer locally --- .../HubSpotCellBasedCandidateGenerator.java | 1 + .../balancer/HubSpotCellCostFunction.java | 121 +++++++++++++++++- 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 2c3d814ad399..8bd0813d8500 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -70,6 +70,7 @@ // we don't know how many servers have the same cell count, so use a simplified online // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) if (cellsOnServer > mostCellsPerServerSoFar) { + serverWithMostCells = serverIndex; mostCellsPerServerSoFar = cellsOnServer; mostCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); } else if (cellsOnServer == mostCellsPerServerSoFar) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index a89f274b5356..e39a9d398404 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.lang.reflect.Field; import java.lang.reflect.Type; import java.util.Arrays; import java.util.Map; @@ -25,10 +26,12 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -42,10 +45,14 @@ import org.apache.hbase.thirdparty.com.google.gson.FieldAttributes; import org.apache.hbase.thirdparty.com.google.gson.Gson; import org.apache.hbase.thirdparty.com.google.gson.GsonBuilder; +import org.apache.hbase.thirdparty.com.google.gson.JsonArray; import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializationContext; import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializer; import org.apache.hbase.thirdparty.com.google.gson.JsonElement; +import org.apache.hbase.thirdparty.com.google.gson.JsonObject; import org.apache.hbase.thirdparty.com.google.gson.JsonParseException; +import org.apache.hbase.thirdparty.com.google.gson.JsonSerializationContext; +import org.apache.hbase.thirdparty.com.google.gson.JsonSerializer; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This @@ -59,14 +66,118 @@ public class HubSpotCellCostFunction extends CostFunction { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellCostFunction.class); private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - private static final Gson OBJECT_MAPPER = new GsonBuilder() + + static class Int2IntCounterMapAdapter implements JsonSerializer, JsonDeserializer { + @Override public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, + JsonSerializationContext context) { + JsonObject obj = new JsonObject(); + + obj.addProperty("loadFactor", src.loadFactor()); + obj.addProperty("initialValue", src.initialValue()); + obj.addProperty("resizeThreshold", src.resizeThreshold()); + obj.addProperty("size", src.size()); + + Field entryField = null; + try { + entryField = Int2IntCounterMap.class.getDeclaredField("entries"); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + entryField.setAccessible(true); + int[] entries = null; + try { + entries = (int[]) entryField.get(src); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + JsonArray entryArray = new JsonArray(entries.length); + for (int entry : entries) { + entryArray.add(entry); + } + obj.add("entries", entryArray); + + return obj; + } + + @Override public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, + JsonDeserializationContext context) throws JsonParseException { + JsonObject obj = json.getAsJsonObject(); + + float loadFactor = obj.get("loadFactor").getAsFloat(); + int initialValue = obj.get("initialValue").getAsInt(); + int resizeThreshold = obj.get("resizeThreshold").getAsInt(); + int size = obj.get("size").getAsInt(); + + JsonArray entryArray = obj.get("entries").getAsJsonArray(); + int[] entries = new int[entryArray.size()]; + + for (int i = 0; i < entryArray.size(); i++) { + entries[i] = entryArray.get(i).getAsInt(); + } + + Int2IntCounterMap result = new Int2IntCounterMap(0, loadFactor, initialValue); + + Field resizeThresholdField = null; + Field entryField = null; + Field sizeField = null; + + try { + resizeThresholdField = Int2IntCounterMap.class.getDeclaredField("resizeThreshold"); + entryField = Int2IntCounterMap.class.getDeclaredField("entries"); + sizeField = Int2IntCounterMap.class.getDeclaredField("size"); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + + resizeThresholdField.setAccessible(true); + entryField.setAccessible(true); + sizeField.setAccessible(true); + + try { + resizeThresholdField.set(result, resizeThreshold); + entryField.set(result, entries); + sizeField.set(result, size); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + + return result; + } + } + + static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation() .enableComplexMapKeySerialization() - .registerTypeAdapter(RegionInfo.class, new JsonDeserializer(){ - @Override public Object deserialize(JsonElement json, Type typeOfT, - JsonDeserializationContext context) throws JsonParseException { - return null; + .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) + .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { + JsonObject obj = json.getAsJsonObject(); + + boolean split = obj.get("split").getAsBoolean(); + long regionId = obj.get("regionId").getAsLong(); + int replicaId = obj.get("replicaId").getAsInt(); + JsonObject tableName = obj.get("tableName").getAsJsonObject(); + JsonArray startKey = obj.get("startKey").getAsJsonArray(); + JsonArray endKey = obj.get("endKey").getAsJsonArray(); + + byte[] startKeyBytes = new byte[startKey.size()]; + byte[] endKeyBytes = new byte[endKey.size()]; + + for (int i = 0; i < startKey.size(); i++) { + startKeyBytes[i] = startKey.get(i).getAsByte(); } + for (int i = 0; i < endKey.size(); i++) { + endKeyBytes[i] = endKey.get(i).getAsByte(); + } + + TableName tb = TableName.valueOf( + tableName.get("namespaceAsString").getAsString(), + tableName.get("qualifierAsString").getAsString() + ); + + RegionInfo result = + RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) + .setReplicaId(replicaId).setStartKey(startKeyBytes).setEndKey(endKeyBytes).build(); + return result; }) .addDeserializationExclusionStrategy(new ExclusionStrategy() { @Override public boolean shouldSkipField(FieldAttributes f) { From aad1e1bf0e2367596f733911f12c62d79a2546ae Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 20 Nov 2024 11:01:26 -0500 Subject: [PATCH 075/126] Stash partial balancer rework --- .../master/balancer/BalancerClusterState.java | 4 + .../HubSpotCellBasedCandidateGenerator.java | 705 ++++++++++++++---- .../balancer/HubSpotCellCostFunction.java | 39 +- .../master/balancer/HubSpotScratchFile.java | 79 ++ 4 files changed, 676 insertions(+), 151 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index de1cb5793017..86974ae39ff8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -123,6 +123,10 @@ public String getRack(ServerName server) { } } + BalancerClusterState() { + this.rackManager = null; + } + BalancerClusterState(Map> clusterState, Map> loads, RegionLocationFinder regionFinder, RackManager rackManager) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 8bd0813d8500..62a02874aeda 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -17,24 +17,32 @@ */ package org.apache.hadoop.hbase.master.balancer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; +import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; @InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { + private static final boolean DEBUG_MAJOR = false; + private static final boolean DEBUG_MINOR = false; private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); @@ -45,53 +53,598 @@ } if (LOG.isTraceEnabled()) { - LOG.trace("Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions for tables {}", + LOG.trace( + "Running HubSpotCellBasedCandidateGenerator with {} servers and {} regions for tables {}", cluster.regionsPerServer.length, cluster.regions.length, cluster.tables); } + int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; + Arrays.stream(cluster.regions) + .flatMap(region -> HubSpotCellCostFunction.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT).stream()) + .forEach(cellOnRegion -> cellCounts[cellOnRegion]++); + + List> cellGroupSizesPerServer = + IntStream.range(0, cluster.regionsPerServer.length).mapToObj( + serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + + Pair cellOnServer = pickHeaviestCellOnServerToImprove(cellGroupSizesPerServer, cellCounts, cluster); + + // we finished the simple balance, now we have a lot of smaller leftovers to balance out + if (cellOnServer.getSecond() == -1) { + return giveAwaySomeRegionToImprove( + pickLightestCellOnServerToImprove(cellGroupSizesPerServer, cellCounts, cluster), + cellGroupSizesPerServer, + cluster + ); + + } + + return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); + } + + private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnServer, List> cellGroupSizesPerServer, BalancerClusterState cluster) { + + short cellToRemove = cellOnServer.getFirst(); + int serverToYieldCell = cellOnServer.getSecond(); + + if (serverToYieldCell == -1) { + if (LOG.isTraceEnabled()) { + LOG.trace("No server available to improve"); + } + return BalanceAction.NULL_ACTION; + } + + Map cellCountsOnServerToYieldCell = cellGroupSizesPerServer.get(serverToYieldCell); + Set cellsOnServerToYieldCell = cellCountsOnServerToYieldCell.keySet(); + + if (cluster.regionsPerServer[serverToYieldCell].length == 0) { + if (LOG.isTraceEnabled()) { + LOG.trace("{} has no regions", serverToYieldCell); + } + return BalanceAction.NULL_ACTION; + } + + Set candidateSet = new HashSet<>(); + for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { + if (server == serverToYieldCell) { + continue; + } + + Map cellsOnServer = cellGroupSizesPerServer.get(server); + + // if that server is perfectly isolated, don't allow that to be broken even to fix another + if (cellsOnServer.keySet().size() == 1 && !cellsOnServer.containsKey(cellToRemove)) { + continue; + } + + int targetRegionsPerServer = Ints.checkedCast( + (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + double allowableImbalanceInRegions = 1.03; + + if (cluster.regionsPerServer[server].length >= Math.ceil(targetRegionsPerServer * allowableImbalanceInRegions)) { + continue; + } + + if (cellsOnServer.containsKey(cellToRemove)) { + candidateSet.add(server); + + Sets.SetView cellsInCommon = + Sets.intersection(cellsOnServerToYieldCell, cellsOnServer.keySet()); + + if (cellsInCommon.size() > 1) { + short commonCellToSwap = + cellsInCommon.stream().filter(cell -> cell != cellToRemove).findAny().get(); + SwapRegionsAction action = + swap(server, cellToRemove, serverToYieldCell, commonCellToSwap, cluster); + if (LOG.isDebugEnabled() || DEBUG_MINOR) { + int sourceOldTotal = cellsOnServerToYieldCell.size(); + int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnServer.size(); + int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(commonCellToSwap) == 1 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + commonCellToSwap, + cellCountsOnServerToYieldCell.get(commonCellToSwap), + commonCellToSwap, + cellsOnServer.get(commonCellToSwap), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + commonCellToSwap, + cellCountsOnServerToYieldCell.get(commonCellToSwap), + commonCellToSwap, + cellsOnServer.get(commonCellToSwap), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + } + return action; + } + } + } + + List candidates = new ArrayList<>(candidateSet); + + if (candidates.isEmpty()) { + // this means we've reached the end of the road for this particular cell + return BalanceAction.NULL_ACTION; + } + + candidates.sort(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cellToRemove))); + + int serverToSend = candidates.get(candidates.size() - 1); + int numInstancesOfCellOnServerToSend = cellGroupSizesPerServer.get(serverToSend).get(cellToRemove); + + double reservoirRandom = ThreadLocalRandom.current().nextDouble(); + for (int i = candidates.size() - 2; i >= 0; i--) { + int nextCandidate = candidates.get(i); + int numInstancesOfCellOnNextCandidate = cellGroupSizesPerServer.get(nextCandidate).get(cellToRemove); + + if (numInstancesOfCellOnNextCandidate < numInstancesOfCellOnServerToSend) { + break; + } + + double nextRandom = ThreadLocalRandom.current().nextDouble(); + if (nextRandom > reservoirRandom) { + reservoirRandom = nextRandom; + serverToSend = nextCandidate; + numInstancesOfCellOnServerToSend = numInstancesOfCellOnNextCandidate; + } + } + + Multimap cellsByRegion = + computeCellsByRegion(cluster.regionsPerServer[serverToYieldCell], cluster.regions); + + MoveRegionAction action = (MoveRegionAction) getAction( + serverToYieldCell, + pickRegionForCell(cellsByRegion, cellToRemove), + serverToSend, + -1 + ); + + Map cellsOnTarget = cellGroupSizesPerServer.get(serverToSend); + + if (LOG.isDebugEnabled() || DEBUG_MINOR) { + int sourceOldTotal = cellsOnServerToYieldCell.size(); + int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnTarget.size(); + int targetNewTotal = cellsOnTarget.size() + (cellsOnTarget.get(cellToRemove) == 0 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Moving s%d.r%d c[%d / %d] to s%d. SOURCE is %d -> %d, TARGET is %d -> %d. Change is %s\n", + action.getFromServer(), + action.getRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + action.getToServer(), + sourceOldTotal, + sourceNewTotal, + targetOldTotal, + targetNewTotal, + descrOfQuality + ); + LOG.debug("Moving s{}.r{} c[{} / {}] to s{}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", + action.getFromServer(), + action.getRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + action.getToServer(), + sourceOldTotal, + sourceNewTotal, + targetOldTotal, + targetNewTotal, + descrOfQuality + ); + } + + return action; + } + + private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, + List> cellGroupSizesPerServer, BalancerClusterState cluster) { + + short cellToImprove = cellOnServer.getFirst(); + int serverToImprove = cellOnServer.getSecond(); + + if (serverToImprove == -1) { + if (LOG.isTraceEnabled()) { + LOG.trace("No server available to improve"); + } + return BalanceAction.NULL_ACTION; + } + + Map cellCountsOnServerToImprove = cellGroupSizesPerServer.get(serverToImprove); + Set cellsOnServerToImprove = cellCountsOnServerToImprove.keySet(); + + if (serverToImprove < 0) { + if (LOG.isTraceEnabled()) { + LOG.trace("No server with cells found"); + } + return BalanceAction.NULL_ACTION; + } + + if (cluster.regionsPerServer[serverToImprove].length == 0) { + if (LOG.isTraceEnabled()) { + LOG.trace("{} has no regions", serverToImprove); + } + return BalanceAction.NULL_ACTION; + } + + Set candidateSet = new HashSet<>(); + Optional shortCircuit = Optional.empty(); + for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { + if (server == serverToImprove) { + continue; + } + + Map cellsOnServer = cellGroupSizesPerServer.get(server); + + // if that server is perfectly isolated, don't allow that to be broken even to fix another + if (cellsOnServer.keySet().size() == 1) { + continue; + } + + if (cellsOnServer.containsKey(cellToImprove)) { + candidateSet.add(server); + + Sets.SetView cellsInCommon = + Sets.intersection(cellsOnServerToImprove, cellsOnServer.keySet()); + + if (cellsInCommon.size() > 1) { + short commonCellToSwap = + cellsInCommon.stream().filter(cell -> cell != cellToImprove).findAny().get(); + SwapRegionsAction action = + swap(serverToImprove, cellToImprove, server, commonCellToSwap, cluster); + if (LOG.isDebugEnabled() || DEBUG_MAJOR) { + int sourceOldTotal = cellsOnServer.size(); + int sourceNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToImprove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnServerToImprove.size(); + int targetNewTotal = cellCountsOnServerToImprove.size() - (cellCountsOnServerToImprove.get(commonCellToSwap) == 1 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + commonCellToSwap, + cellCountsOnServerToImprove.get(commonCellToSwap), + cellToImprove, + cellCountsOnServerToImprove.get(cellToImprove), + cellToImprove, + cellsOnServer.get(cellToImprove), + commonCellToSwap, + cellsOnServer.get(commonCellToSwap), + descrOfQuality + ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + commonCellToSwap, + cellCountsOnServerToImprove.get(commonCellToSwap), + cellToImprove, + cellCountsOnServerToImprove.get(cellToImprove), + cellToImprove, + cellsOnServer.get(cellToImprove), + commonCellToSwap, + cellsOnServer.get(commonCellToSwap), + descrOfQuality + ); + } + return action; + } + } + } + + List candidates = new ArrayList<>(candidateSet); + + if (candidates.isEmpty()) { + // this means we've reached the end of the road for this particular cell + return BalanceAction.NULL_ACTION; + } + + int serverToSwap = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); + short cellToOffer = cellsOnServerToImprove.stream() + .filter(cell -> cell != cellToImprove) + .collect(Collectors.toList()) + .get(ThreadLocalRandom.current().nextInt(cellsOnServerToImprove.size() - 1)); + + Map cellsOnServer = cellGroupSizesPerServer.get(serverToSwap); + + SwapRegionsAction action = + swap(serverToImprove, cellToImprove, serverToSwap, cellToOffer, cluster); + + if (LOG.isDebugEnabled() || DEBUG_MAJOR) { + int sourceOldTotal = cellsOnServer.size(); + int sourceNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToImprove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnServerToImprove.size(); + int targetNewTotal = cellCountsOnServerToImprove.size() - (cellCountsOnServerToImprove.get(cellToOffer) == 1 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToOffer, + cellCountsOnServerToImprove.get(cellToOffer), + cellToImprove, + cellCountsOnServerToImprove.get(cellToImprove), + cellToImprove, + cellsOnServer.get(cellToImprove), + cellToOffer, + cellsOnServer.get(cellToOffer), + descrOfQuality + ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToOffer, + cellCountsOnServerToImprove.get(cellToOffer), + cellToImprove, + cellCountsOnServerToImprove.get(cellToImprove), + cellToImprove, + cellsOnServer.get(cellToImprove), + cellToOffer, + cellsOnServer.get(cellToOffer), + descrOfQuality + ); + } + + return action; + } + + private SwapRegionsAction swap( + int receivingServer, + short cellToGiveToReceivingServer, + int offeringServer, + short cellToOfferFromReceivingServerToOrigin, + BalancerClusterState cluster + ) { + Multimap cellsByRegionForReceivingServer = + computeCellsByRegion(cluster.regionsPerServer[receivingServer], cluster.regions); + Multimap cellsByRegionForOfferingServer = + computeCellsByRegion(cluster.regionsPerServer[offeringServer], cluster.regions); + + return (SwapRegionsAction) getAction( + offeringServer, pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), + receivingServer, pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin) + ); + } + + private int pickRegionForCell(Multimap cellsByRegionOnServer, short cellToMove) { + return cellsByRegionOnServer.keySet().stream() + .filter(region -> cellsByRegionOnServer.get(region).contains(cellToMove)) + .min(Comparator.comparingInt(region -> cellsByRegionOnServer.get(region).size())) + .orElseGet(() -> -1); + } + + static List computeCellsPerRs(BalancerClusterState cluster) { + List> cellGroupSizesPerServer = + IntStream.range(0, cluster.regionsPerServer.length).mapToObj( + serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); + } + + private Pair pickHeaviestCellOnServerToImprove( + List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; - int serverWithMostCells = -1; - int mostCellsPerServerSoFar = 0; + Pair mostFrequentCellOnServer = Pair.newPair((short) -1, -1); + + int targetCellsPerServer = Ints.checkedCast( + (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); + int highestCellCountSoFar = Integer.MIN_VALUE; double mostCellsReservoirRandom = -1; for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { int[] regionsForServer = regionsPerServer[serverIndex]; - int cellsOnServer = numCells(cluster, serverIndex, regionsForServer); + Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); + + if (cellsOnServer.keySet().size() <= targetCellsPerServer) { + continue; + } + + Optional> mostFrequentCellMaybe = + cellsOnServer.entrySet().stream().max(Map.Entry.comparingByValue()); + + if (!mostFrequentCellMaybe.isPresent()) { + continue; + } + + short mostFrequentCell = mostFrequentCellMaybe.get().getKey(); + int mostFrequentCellCount = mostFrequentCellMaybe.get().getValue(); + + // if we've collected all of the regions for a given cell on one server, we can't improve + if (mostFrequentCellCount == cellCounts[mostFrequentCell]) { + continue; + } + + long numServersWithMostFrequentCellNotSaturated = + cellGroupSizesPerServer.stream().filter(cellMap -> cellMap.containsKey(mostFrequentCell)) + .filter(cellMap -> cellMap.keySet().size() > 1).count(); + // if we're down to only one server unsaturated with the most frequent cell, there are no good swaps + if (numServersWithMostFrequentCellNotSaturated == 1) { + continue; + } if (LOG.isTraceEnabled()) { - LOG.trace("Server {} has {} regions, which have {} cells", - serverIndex, - Arrays.stream(regionsForServer).boxed().sorted().collect( - Collectors.toList()), cellsOnServer); + LOG.trace("Server {} has {} regions, which have {} cells", serverIndex, + Arrays.stream(regionsForServer).boxed().sorted().collect(Collectors.toList()), + cellsOnServer.size()); } // we don't know how many servers have the same cell count, so use a simplified online // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) - if (cellsOnServer > mostCellsPerServerSoFar) { - serverWithMostCells = serverIndex; - mostCellsPerServerSoFar = cellsOnServer; + if (mostFrequentCellCount > highestCellCountSoFar) { + mostFrequentCellOnServer = Pair.newPair(mostFrequentCell, serverIndex); + highestCellCountSoFar = mostFrequentCellCount; mostCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (cellsOnServer == mostCellsPerServerSoFar) { + } else if (mostFrequentCellCount == highestCellCountSoFar) { double maxCellRandom = ThreadLocalRandom.current().nextDouble(); if (maxCellRandom > mostCellsReservoirRandom) { - serverWithMostCells = serverIndex; + mostFrequentCellOnServer = Pair.newPair(mostFrequentCell, serverIndex); mostCellsReservoirRandom = maxCellRandom; } } } - return maybeMoveRegion(cluster, serverWithMostCells); + return mostFrequentCellOnServer; } - private int numCells(BalancerClusterState cluster, int serverIndex, int[] regionsForServer) { - boolean[] cellsPresent = new boolean[HubSpotCellCostFunction.MAX_CELL_COUNT]; + private Pair pickLightestCellOnServerToImprove( + List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { + cluster.sortServersByRegionCount(); + int[][] regionsPerServer = cluster.regionsPerServer; + + Pair leastFrequentCellOnServer = Pair.newPair((short) -1, -1); + + int targetCellsPerServer = Ints.checkedCast( + (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)) + 1; + int targetRegionsPerServer = Ints.checkedCast( + (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + double allowableImbalanceInRegions = 1.03; + + int lowestCellCountSoFar = Integer.MAX_VALUE; + double leastCellsReservoirRandom = -1; + + for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { + Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); + + if (cellsOnServer.keySet().size() <= targetCellsPerServer) { + continue; + } + + Optional> leastFrequentCellMaybe = + cellsOnServer.entrySet().stream().min(Map.Entry.comparingByValue()); + + if (!leastFrequentCellMaybe.isPresent()) { + continue; + } + + short leastFrequentCell = leastFrequentCellMaybe.get().getKey(); + int leastFrequentCellCount = leastFrequentCellMaybe.get().getValue(); + + long numServersWithLeastFrequentCellNotSaturated = + IntStream.range(0, cluster.numServers) + .filter(server -> { + Map cellCountsForServer = cellGroupSizesPerServer.get(server); + + if (!cellCountsForServer.containsKey(leastFrequentCell)) { + return false; + } + + return cellCountsForServer.keySet().size() != 1 || regionsPerServer[server].length + <= Math.ceil(targetRegionsPerServer * allowableImbalanceInRegions); + }) + .count(); + + // if we're down to only one server unsaturated with the least frequent cell, there are no good swaps + if (numServersWithLeastFrequentCellNotSaturated == 1) { + continue; + } + + // we don't know how many servers have the same cell count, so use a simplified online + // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) + if (leastFrequentCellCount < lowestCellCountSoFar) { + leastFrequentCellOnServer = Pair.newPair(leastFrequentCell, serverIndex); + lowestCellCountSoFar = leastFrequentCellCount; + leastCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); + } else if (leastFrequentCellCount == lowestCellCountSoFar) { + double maxCellRandom = ThreadLocalRandom.current().nextDouble(); + if (maxCellRandom > leastCellsReservoirRandom) { + leastFrequentCellOnServer = Pair.newPair(leastFrequentCell, serverIndex); + leastCellsReservoirRandom = maxCellRandom; + } + } + } + + return leastFrequentCellOnServer; + } + + private static Map computeCellGroupSizes(BalancerClusterState cluster, + int serverIndex, int[] regionsForServer) { + Map cellGroupSizes = new HashMap<>(); + int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int regionIndex : regionsForServer) { if (regionIndex < 0 || regionIndex > cluster.regions.length) { if (LOG.isTraceEnabled()) { - LOG.trace("Skipping region {} because it's <0 or >{}", regionIndex, regionsForServer.length); + LOG.trace("Skipping region {} because it's <0 or >{}", regionIndex, + regionsForServer.length); } continue; } @@ -100,7 +653,8 @@ private int numCells(BalancerClusterState cluster, int serverIndex, int[] region if (!region.getTable().getNamespaceAsString().equals("default")) { if (LOG.isTraceEnabled()) { - LOG.trace("Skipping region {} because it's not in the default namespace", region.getTable().getNameWithNamespaceInclAsString()); + LOG.trace("Skipping region {} because it's not in the default namespace", + region.getTable().getNameWithNamespaceInclAsString()); } continue; } @@ -128,122 +682,21 @@ private int numCells(BalancerClusterState cluster, int serverIndex, int[] region } for (short i = startCellId; i < endCellId; i++) { - cellsPresent[i] = true; + cellCounts[i]++; } if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { - cellsPresent[endCellId] = true; - } - } - - int count = 0; - for (boolean hasCell : cellsPresent) { - if (hasCell) { - count++; - } - } - - return count; - } - - BalanceAction maybeMoveRegion(BalancerClusterState cluster, int serverWithMostCells) { - if (serverWithMostCells < 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("No server with cells found"); + cellCounts[endCellId]++; } - return BalanceAction.NULL_ACTION; } - if (cluster.regionsPerServer[serverWithMostCells].length == 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("{} has no regions", serverWithMostCells); - } - return BalanceAction.NULL_ACTION; - } - - Multimap cellsByRegionOnSource = - computeCellsByRegion(cluster.regionsPerServer[serverWithMostCells], cluster.regions); - Map countOfRegionsForCellOnSource = new HashMap<>(); - cellsByRegionOnSource.forEach( - (region, cell) -> countOfRegionsForCellOnSource.computeIfAbsent(cell, - ignored -> new AtomicInteger()).incrementAndGet()); - - int regionWithFewestInstancesOfCellsPresent = - cellsByRegionOnSource.keySet().stream().min(Comparator.comparing(region -> { - return cellsByRegionOnSource.get(region).stream().mapToInt(cell -> { - return countOfRegionsForCellOnSource.get(cell).get(); - }).max().orElseGet(() -> 0); - })).orElseGet(() -> -1); - - int targetServer = computeBestServerToReceiveRegion(cluster, serverWithMostCells, - regionWithFewestInstancesOfCellsPresent); - - if (LOG.isDebugEnabled()) { - Multimap cellsByRegionOnTarget = - computeCellsByRegion(cluster.regionsPerServer[targetServer], cluster.regions); - - Set currentCellsOnSource = new HashSet<>(cellsByRegionOnSource.values()); - Set currentCellsOnTarget = new HashSet<>(cellsByRegionOnTarget.values()); - - Set afterMoveCellsOnSource = cellsByRegionOnSource.keySet().stream() - .filter(region -> region != regionWithFewestInstancesOfCellsPresent) - .flatMap(region -> cellsByRegionOnSource.get(region).stream()) - .collect(Collectors.toSet()); - Set afterMoveCellsOnTarget = new HashSet<>(currentCellsOnTarget); - afterMoveCellsOnTarget.addAll( - cellsByRegionOnSource.get(regionWithFewestInstancesOfCellsPresent)); - - boolean sourceImproves = afterMoveCellsOnSource.size() < currentCellsOnSource.size(); - boolean targetStaysSame = afterMoveCellsOnTarget.size() == currentCellsOnTarget.size(); - - LOG.debug("Moving s{}.r{} to {}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", - serverWithMostCells, - regionWithFewestInstancesOfCellsPresent, - targetServer, - currentCellsOnSource.size(), - afterMoveCellsOnSource.size(), - currentCellsOnTarget.size(), - afterMoveCellsOnTarget.size(), - (sourceImproves && targetStaysSame) ? "GOOD" : ((sourceImproves) ? "NEUTRAL" : "BAD") - ); - } - - return getAction(serverWithMostCells, regionWithFewestInstancesOfCellsPresent, targetServer, -1); - } - - private int computeBestServerToReceiveRegion(BalancerClusterState cluster, int currentServer, - int region) { - // This is the lightest loaded (by count), but we want to keep cell collocation to a minimum - int target = cluster.serverIndicesSortedByRegionCount[0]; - - Set cellsOnTransferRegion = - new HashSet<>(computeCellsByRegion(new int[] { region }, cluster.regions).get(region)); - - // so, we'll make a best effort to see if we can find a reasonably loaded server that already - // has the cells for this region - for (int i = 0; i < cluster.serverIndicesSortedByRegionCount.length; i++) { - int server = cluster.serverIndicesSortedByRegionCount[i]; - - if (server == currentServer) { - continue; - } - - int[] regionsOnCandidate = cluster.regionsPerServer[server]; - if (regionsOnCandidate.length > 2 * cluster.regionsPerServer[currentServer].length) { - // don't try to transfer a region to a server that already has more than 2x ours - break; - } - - Multimap possibleTargetCellsByRegion = - computeCellsByRegion(regionsOnCandidate, cluster.regions); - // if the candidate server has all the cells we need, this transfer can only improve isolation - if (new HashSet<>(possibleTargetCellsByRegion.values()).containsAll(cellsOnTransferRegion)) { - target = server; - break; + for (short c = 0; c < cellCounts.length; c++) { + if (cellCounts[c] > 0) { + cellGroupSizes.put(c, cellCounts[c]); } } - return target; + return cellGroupSizes; } private Multimap computeCellsByRegion(int[] regionIndices, RegionInfo[] regions) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index e39a9d398404..27c95460894b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -200,7 +200,7 @@ static class Int2IntCounterMapAdapter implements JsonSerializer highestLocalityServerMaybe = - Optional.ofNullable(regionLocations).filter(locations -> locations.length > 0) - .map(locations -> locations[0]).map(serverIndex -> this.servers[serverIndex]); - int assignedServers = Optional.ofNullable(regionLocations) - .map(locations -> locations.length).orElseGet(() -> 0); + Optional.ofNullable(location).filter(serverLocation -> serverLocation >= 0) + .map(serverIndex -> this.servers[serverIndex]); - if (assignedServers > 0) { + if (location > 0) { numAssigned++; } else { numUnassigned++; @@ -373,8 +370,7 @@ private String snapshotState() { .append(Bytes.toHex(region.getStartKey())).append(", ") .append(Bytes.toHex(region.getEndKey())).append(") ").append(cellsInRegion).append(" [") .append(regionSizeMb).append(" mb] -> ") - .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")) - .append("(with ").append(assignedServers).append(" total candidates)"); + .append(highestLocalityServerMaybe.map(ServerName::getServerName).orElseGet(() -> "N/A")); } stateString.append("\n]\n\n\tAssigned regions: ").append(numAssigned) @@ -400,7 +396,7 @@ static int calculateCurrentCellCost( int numServers, int bestCaseMaxCellsPerServer, RegionInfo[] regions, - int[][] regionLocations, + int[] regionLocations, boolean[][] serverHasCell, Function getRegionSizeMbFunc ) { @@ -429,25 +425,18 @@ static int calculateCurrentCellCost( throw new IllegalStateException("No region available at index " + i); } - if (regionLocations[i] == null) { - throw new IllegalStateException( - "No server list available for region " + regions[i].getShortNameToLog()); - } - - if (regionLocations[i].length == 0) { + if (regionLocations[i] == -1) { int regionSizeMb = getRegionSizeMbFunc.apply(i); if (regionSizeMb == 0 && LOG.isTraceEnabled()) { LOG.trace("{} ({} mb): no servers available, this IS an empty region", regions[i].getShortNameToLog(), regionSizeMb); } else { - LOG.warn("{} ({} mb): no servers available, this IS NOT an empty region", - regions[i].getShortNameToLog(), regionSizeMb); + throw new IllegalStateException( + "No server list available for region " + regions[i].getShortNameToLog()); } - - continue; } - setCellsForServer(serverHasCell[regionLocations[i][0]], regions[i].getStartKey(), + setCellsForServer(serverHasCell[regionLocations[i]], regions[i].getStartKey(), regions[i].getEndKey(), numCells); } @@ -527,7 +516,7 @@ static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { return Shorts.checkedCast(cellsInRegions.size()); } - private static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { + static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { return range(padToTwoBytes(rawStart, (byte) 0), padToTwoBytes(rawStop, (byte) -1), numCells); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java new file mode 100644 index 000000000000..b23de8979a4b --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java @@ -0,0 +1,79 @@ +package org.apache.hadoop.hbase.master.balancer; + +import java.io.IOException; +import java.net.URL; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import com.google.common.math.Quantiles; +import org.apache.hadoop.conf.Configuration; +import org.apache.hbase.thirdparty.com.google.common.math.Stats; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Charsets; +import org.apache.hbase.thirdparty.com.google.common.io.Resources; + +public class HubSpotScratchFile { + private static final Logger LOG = LoggerFactory.getLogger(HubSpotScratchFile.class); + + public static void main(String[] args) throws IOException { + String file = Resources.readLines(new URL("file:///Users/eszabowexler/Downloads/cluster.json"), Charsets.UTF_8).stream() + .collect(Collectors.joining("\n")); + BalancerClusterState state = HubSpotCellCostFunction.OBJECT_MAPPER.fromJson(file, BalancerClusterState.class); + + + HubSpotCellCostFunction func = + new HubSpotCellCostFunction(new Configuration()); + HubSpotCellBasedCandidateGenerator generator = new HubSpotCellBasedCandidateGenerator(); + + func.prepare(state); + double cost = func.cost(); + Set movedRegions = new HashSet<>(); + Set fromServers = new HashSet<>(); + Set toServers = new HashSet<>(); + Set repeatMoveRegions = new HashSet<>(); + + double lastCost = cost; + int printFrequency = 500; + + for (int step = 0; step < 200_000; step++) { + if (step % printFrequency == 0) { + double costDelta = cost - lastCost; + lastCost = cost; + double costPerStep = costDelta / printFrequency; + + List size = HubSpotCellBasedCandidateGenerator.computeCellsPerRs(state); + Map quantiles = + Quantiles.scale(100).indexes(10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(size); + + System.out.printf("Step %d --> %.2f - %d regions moved (%d more than once), %d sources, %d targets. Moving %.2f per step, cumulative %.2f drop\t\t\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t\n", + step, cost, movedRegions.size(), repeatMoveRegions.size(), fromServers.size(), toServers.size(), costPerStep, costDelta, + quantiles.get(10), quantiles.get(20),quantiles.get(30),quantiles.get(40),quantiles.get(50),quantiles.get(60),quantiles.get(70),quantiles.get(80),quantiles.get(90),quantiles.get(100)); + } + BalanceAction action = generator.generate(state); + if (action instanceof SwapRegionsAction) { + SwapRegionsAction swapRegionsAction = (SwapRegionsAction) action; + + if (movedRegions.contains(swapRegionsAction.getFromRegion())) { + repeatMoveRegions.add(swapRegionsAction.getFromServer()); + } + if (movedRegions.contains(swapRegionsAction.getToRegion())) { + repeatMoveRegions.add(swapRegionsAction.getToRegion()); + } + + movedRegions.add(swapRegionsAction.getFromRegion()); + movedRegions.add(swapRegionsAction.getToRegion()); + fromServers.add(swapRegionsAction.getFromServer()); + toServers.add(swapRegionsAction.getToServer()); + } + + state.doAction(action); + func.postAction(action); + cost = func.cost(); + } + + LOG.info("{}", state); + } +} From 5a1dc7971f3bc0a7752a79c646ee61b6f5794376 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 20 Nov 2024 14:51:40 -0500 Subject: [PATCH 076/126] Stash2 -- gets to a balance of 1-6 cells/RS --- .../HubSpotCellBasedCandidateGenerator.java | 489 +++++++++++++----- 1 file changed, 366 insertions(+), 123 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 62a02874aeda..b6f9b1fd8e4b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -41,6 +41,7 @@ import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; @InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { + private static final int NO_SERVER = -1; private static final boolean DEBUG_MAJOR = false; private static final boolean DEBUG_MINOR = false; @@ -62,50 +63,117 @@ Arrays.stream(cluster.regions) .flatMap(region -> HubSpotCellCostFunction.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT).stream()) .forEach(cellOnRegion -> cellCounts[cellOnRegion]++); + double[] cellPercents = new double[HubSpotCellCostFunction.MAX_CELL_COUNT]; + for (int i = 0; i < cellCounts.length; i++) { + cellPercents[i] = (double) cellCounts[i] / cluster.numRegions; + } List> cellGroupSizesPerServer = IntStream.range(0, cluster.regionsPerServer.length).mapToObj( serverIndex -> computeCellGroupSizes(cluster, serverIndex, cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); - Pair cellOnServer = pickHeaviestCellOnServerToImprove(cellGroupSizesPerServer, cellCounts, cluster); + return generateAction(cluster, cellCounts, cellGroupSizesPerServer); + } - // we finished the simple balance, now we have a lot of smaller leftovers to balance out - if (cellOnServer.getSecond() == -1) { - return giveAwaySomeRegionToImprove( - pickLightestCellOnServerToImprove(cellGroupSizesPerServer, cellCounts, cluster), - cellGroupSizesPerServer, - cluster - ); + private BalanceAction generateAction( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer + ) { + int targetRegionsPerServer = Ints.checkedCast( + (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + List underloadedServers = IntStream.range(0, cluster.numServers) + .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer).boxed() + .distinct() + .collect(Collectors.toList()); + List overloadedServers = IntStream.range(0, cluster.numServers) + .filter(server -> cluster.regionsPerServer[server].length > targetRegionsPerServer).boxed() + .distinct() + .collect(Collectors.toList()); + + // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within plus/minus 1 of the target + if (!underloadedServers.isEmpty() && !overloadedServers.isEmpty()) { + return moveRegionFromOverloadedToUnderloaded(overloadedServers, underloadedServers, cellGroupSizesPerServer, cluster); } - return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); - } + // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps + Pair cellOnServer = pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); - private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnServer, List> cellGroupSizesPerServer, BalancerClusterState cluster) { + if (cellOnServer.getSecond() != NO_SERVER) { + return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); + } - short cellToRemove = cellOnServer.getFirst(); - int serverToYieldCell = cellOnServer.getSecond(); + // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces + // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can either swap + // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that subsequent rounds + // will use step (1) to repair the imbalance. + cellOnServer = + pickLeastFrequentCellOnMostLoadedServer(cellGroupSizesPerServer, cellCounts, cluster); - if (serverToYieldCell == -1) { - if (LOG.isTraceEnabled()) { - LOG.trace("No server available to improve"); - } + if (cellOnServer.getSecond() == NO_SERVER) { return BalanceAction.NULL_ACTION; } - Map cellCountsOnServerToYieldCell = cellGroupSizesPerServer.get(serverToYieldCell); - Set cellsOnServerToYieldCell = cellCountsOnServerToYieldCell.keySet(); + return giveAwaySomeRegionToImprove( + cellOnServer, + cellGroupSizesPerServer, + cellCounts, + cluster + ); + } - if (cluster.regionsPerServer[serverToYieldCell].length == 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("{} has no regions", serverToYieldCell); + private BalanceAction moveRegionFromOverloadedToUnderloaded( + List overloadedServers, + List underloadedServers, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + int underloadedServer = + underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); + int overloadedServer = overloadedServers.get(ThreadLocalRandom.current().nextInt(overloadedServers.size())); + short cellToMove = pickLeastFrequentCell(cellGroupSizesPerServer.get(overloadedServer)); + Multimap cellsByRegionForOverloadedServer = + computeCellsByRegion(cluster.regionsPerServer[overloadedServer], cluster.regions); + + for (int overloadedServerCandidate : overloadedServers) { + short lightest = pickLeastFrequentCell(cellGroupSizesPerServer.get(overloadedServerCandidate)); + if (cellGroupSizesPerServer.get(overloadedServerCandidate).get(lightest) == 1) { + overloadedServer = overloadedServerCandidate; + cellToMove = lightest; + cellsByRegionForOverloadedServer = computeCellsByRegion(cluster.regionsPerServer[overloadedServer], cluster.regions); + break; } - return BalanceAction.NULL_ACTION; } - Set candidateSet = new HashSet<>(); + return getAction( + overloadedServer, + pickRegionForCell(cellsByRegionForOverloadedServer, cellToMove), underloadedServer, + -1 + ); + } + + private BalanceAction giveAwaySomeRegionToImprove( + Pair cellOnServer, + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { + + short cellToRemove = cellOnServer.getFirst(); + int serverToYieldCell = cellOnServer.getSecond(); + + Map cellCountsOnServerToYieldCell = cellGroupSizesPerServer.get(serverToYieldCell); + Set cellsOnServerToYieldCell = cellCountsOnServerToYieldCell.keySet(); + + int targetRegionsPerServer = Ints.checkedCast( + (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + double allowableImbalanceInRegions = 1.05; + + Set partialCandidatesWithInstanceOfOurCell = new HashSet<>(); + Set fullCandidatesWithInstanceOfOurCell = new HashSet<>(); + List candidatesWithFewerTotalCells = new ArrayList<>(); for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { if (server == serverToYieldCell) { continue; @@ -113,21 +181,32 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer Map cellsOnServer = cellGroupSizesPerServer.get(server); + Set cellsOnServerAndOthers = + cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) + .collect(Collectors.toSet()); + + double maxAllowedRegionCountPerServer = allowableImbalanceInRegions * targetRegionsPerServer; + if (cellsOnServer.keySet().size() < cellCountsOnServerToYieldCell.keySet().size() && + cluster.regionsPerServer[server].length <= Math.ceil(maxAllowedRegionCountPerServer)) { + candidatesWithFewerTotalCells.add(server); + } + // if that server is perfectly isolated, don't allow that to be broken even to fix another if (cellsOnServer.keySet().size() == 1 && !cellsOnServer.containsKey(cellToRemove)) { continue; } - int targetRegionsPerServer = Ints.checkedCast( - (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - double allowableImbalanceInRegions = 1.03; - - if (cluster.regionsPerServer[server].length >= Math.ceil(targetRegionsPerServer * allowableImbalanceInRegions)) { + if (cluster.regionsPerServer[server].length >= Math.ceil(maxAllowedRegionCountPerServer)) { continue; } if (cellsOnServer.containsKey(cellToRemove)) { - candidateSet.add(server); + if (cellsOnServer.keySet().size() == 1 + || cellsOnServerAndOthers.size() == 1) { + fullCandidatesWithInstanceOfOurCell.add(server); + } else { + partialCandidatesWithInstanceOfOurCell.add(server); + } Sets.SetView cellsInCommon = Sets.intersection(cellsOnServerToYieldCell, cellsOnServer.keySet()); @@ -193,33 +272,169 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer } } - List candidates = new ArrayList<>(candidateSet); + int serverToSend = -1; + List candidates = new ArrayList<>(partialCandidatesWithInstanceOfOurCell); - if (candidates.isEmpty()) { - // this means we've reached the end of the road for this particular cell - return BalanceAction.NULL_ACTION; - } - - candidates.sort(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cellToRemove))); - - int serverToSend = candidates.get(candidates.size() - 1); - int numInstancesOfCellOnServerToSend = cellGroupSizesPerServer.get(serverToSend).get(cellToRemove); - - double reservoirRandom = ThreadLocalRandom.current().nextDouble(); - for (int i = candidates.size() - 2; i >= 0; i--) { - int nextCandidate = candidates.get(i); - int numInstancesOfCellOnNextCandidate = cellGroupSizesPerServer.get(nextCandidate).get(cellToRemove); + Optional serverWeCanImprove = candidates.stream() + .filter( + server -> { + Map countsForServer = cellGroupSizesPerServer.get(server); + return countsForServer.keySet().stream() + .anyMatch(cell -> cell != cellToRemove && countsForServer.get(cell) == 1); + } + ).findFirst(); + + if (partialCandidatesWithInstanceOfOurCell.isEmpty() && fullCandidatesWithInstanceOfOurCell.isEmpty()) { + // nobody else has a copy of this cell that we can offload, we'll need to increase another server's load to reduce ours + serverToSend = candidatesWithFewerTotalCells.get(ThreadLocalRandom.current().nextInt(candidatesWithFewerTotalCells.size())); + } else if (serverWeCanImprove.isPresent()) { + int serverToSwap = serverWeCanImprove.get(); + Map cellsOnServer = cellGroupSizesPerServer.get(serverToSwap); + short cellToTakeFromSwap = cellsOnServer.keySet().stream() + .filter(cell -> cell != cellToRemove && cellsOnServer.get(cell) == 1) + .findFirst() + .get(); + + SwapRegionsAction action = + swap(serverToSwap, cellToRemove, serverToYieldCell, cellToTakeFromSwap, cluster); + + if (LOG.isDebugEnabled() || DEBUG_MINOR) { + int sourceOldTotal = cellsOnServerToYieldCell.size(); + int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnServer.size(); + int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToRemove) == 1 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + cellToTakeFromSwap, + cellCountsOnServerToYieldCell.get(cellToTakeFromSwap), + cellToTakeFromSwap, + cellsOnServer.get(cellToTakeFromSwap), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + cellToTakeFromSwap, + cellCountsOnServerToYieldCell.get(cellToTakeFromSwap), + cellToTakeFromSwap, + cellsOnServer.get(cellToTakeFromSwap), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + } + return action; + } else if (!fullCandidatesWithInstanceOfOurCell.isEmpty()) { + serverToSend = fullCandidatesWithInstanceOfOurCell.stream() + .findAny() + .get(); + } else { + candidates.sort(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cellToRemove))); + + serverToSend = candidates.get(Math.max(0, candidates.size() - 1)); + int numInstancesOfCellOnServerToSend = cellGroupSizesPerServer.get(serverToSend).get(cellToRemove); + + double reservoirRandom = ThreadLocalRandom.current().nextDouble(); + for (int i = candidates.size() - 2; i >= 0; i--) { + int nextCandidate = candidates.get(i); + int numInstancesOfCellOnNextCandidate = cellGroupSizesPerServer.get(nextCandidate).get(cellToRemove); + + if (numInstancesOfCellOnNextCandidate < numInstancesOfCellOnServerToSend) { + break; + } - if (numInstancesOfCellOnNextCandidate < numInstancesOfCellOnServerToSend) { - break; + double nextRandom = ThreadLocalRandom.current().nextDouble(); + if (nextRandom > reservoirRandom) { + reservoirRandom = nextRandom; + serverToSend = nextCandidate; + numInstancesOfCellOnServerToSend = numInstancesOfCellOnNextCandidate; + } } - double nextRandom = ThreadLocalRandom.current().nextDouble(); - if (nextRandom > reservoirRandom) { - reservoirRandom = nextRandom; - serverToSend = nextCandidate; - numInstancesOfCellOnServerToSend = numInstancesOfCellOnNextCandidate; - } + short cellToTake = pickRandomMinorityCell(cellGroupSizesPerServer.get(serverToSend)); + + Map cellsOnServer = cellGroupSizesPerServer.get(serverToSend); + SwapRegionsAction action = + swap(serverToSend, cellToRemove, serverToYieldCell, cellToTake, cluster); + + if (LOG.isDebugEnabled() || DEBUG_MINOR) { + int sourceOldTotal = cellsOnServerToYieldCell.size(); + int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); + int targetOldTotal = cellsOnServer.size(); + int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToRemove) == 1 ? 1 : 0); + + boolean sourceImproves = sourceNewTotal < sourceOldTotal; + boolean targetImproves = targetNewTotal < targetOldTotal; + boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; + boolean targetStaysSame = targetOldTotal == targetNewTotal; + + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + + System.out.printf( + "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + cellToTake, + cellCountsOnServerToYieldCell.get(cellToTake), + cellToTake, + cellsOnServer.get(cellToTake), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + cellToRemove, + cellCountsOnServerToYieldCell.get(cellToRemove), + cellToTake, + cellCountsOnServerToYieldCell.get(cellToTake), + cellToTake, + cellsOnServer.get(cellToTake), + cellToRemove, + cellsOnServer.get(cellToRemove), + descrOfQuality + ); + } + return action; } Multimap cellsByRegion = @@ -281,6 +496,63 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer return action; } + private short pickRandomMinorityCell(Map cellCounts) { + short preservedCell = pickMostFrequentCell(cellCounts); + List candidates = cellCounts.keySet().stream().filter(cell -> cell != preservedCell) + .collect(Collectors.toList()); + return candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); + } + + private short pickLeastFrequentCell( + Map cellCounts + ) { + short cellToPick = -1; + int lowestCountSoFar = Integer.MAX_VALUE; + double reservoirRandom = -1; + + for (short cell : cellCounts.keySet()) { + int count = cellCounts.get(cell); + if (count < lowestCountSoFar) { + cellToPick = cell; + lowestCountSoFar = count; + reservoirRandom = ThreadLocalRandom.current().nextDouble(); + } else if (count == lowestCountSoFar) { + double cellRandom = ThreadLocalRandom.current().nextDouble(); + if (cellRandom > reservoirRandom) { + cellToPick = cell; + reservoirRandom = cellRandom; + } + } + } + + return cellToPick; + } + + private short pickMostFrequentCell( + Map cellCounts + ) { + short cellToPick = -1; + int highestCountSoFar = Integer.MIN_VALUE; + double reservoirRandom = -1; + + for (short cell : cellCounts.keySet()) { + int count = cellCounts.get(cell); + if (count > highestCountSoFar) { + cellToPick = cell; + highestCountSoFar = count; + reservoirRandom = ThreadLocalRandom.current().nextDouble(); + } else if (count == highestCountSoFar) { + double cellRandom = ThreadLocalRandom.current().nextDouble(); + if (cellRandom > reservoirRandom) { + cellToPick = cell; + reservoirRandom = cellRandom; + } + } + } + + return cellToPick; + } + private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, List> cellGroupSizesPerServer, BalancerClusterState cluster) { @@ -312,7 +584,6 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, } Set candidateSet = new HashSet<>(); - Optional shortCircuit = Optional.empty(); for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { if (server == serverToImprove) { continue; @@ -498,7 +769,7 @@ static List computeCellsPerRs(BalancerClusterState cluster) { return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); } - private Pair pickHeaviestCellOnServerToImprove( + private Pair pickMostFrequentCellOnAnyUnsaturatedServer( List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; @@ -514,19 +785,39 @@ private Pair pickHeaviestCellOnServerToImprove( int[] regionsForServer = regionsPerServer[serverIndex]; Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); - if (cellsOnServer.keySet().size() <= targetCellsPerServer) { + Set cellsOnThisServerAndOthers = + cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) + .collect(Collectors.toSet()); + + if (cellsOnServer.keySet().size() <= targetCellsPerServer + // if we have a small cell where the entire cell is local, we MUST have at least 2 cells on this server to have + // an overall region balance, so allow us to go over the target by 1 cell + || cellsOnThisServerAndOthers.size() == 1) { continue; } - Optional> mostFrequentCellMaybe = - cellsOnServer.entrySet().stream().max(Map.Entry.comparingByValue()); + List> cellsByFrequencyDesc = + cellsOnServer.entrySet().stream().sorted(Map.Entry.comparingByValue()) + .collect(Collectors.toList()); - if (!mostFrequentCellMaybe.isPresent()) { + if (cellsByFrequencyDesc.isEmpty()) { continue; } - short mostFrequentCell = mostFrequentCellMaybe.get().getKey(); - int mostFrequentCellCount = mostFrequentCellMaybe.get().getValue(); + + int probe = cellsByFrequencyDesc.size() - 1; + short mostFrequentCellTemp = -1; + int mostFrequentCellCountTemp = -1; + + do { + Map.Entry entry = cellsByFrequencyDesc.get(probe); + mostFrequentCellTemp = entry.getKey(); + mostFrequentCellCountTemp = entry.getValue(); + probe--; + } while(mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); + + final short mostFrequentCell = mostFrequentCellTemp; + final int mostFrequentCellCount = mostFrequentCellCountTemp; // if we've collected all of the regions for a given cell on one server, we can't improve if (mostFrequentCellCount == cellCounts[mostFrequentCell]) { @@ -565,74 +856,26 @@ private Pair pickHeaviestCellOnServerToImprove( return mostFrequentCellOnServer; } - private Pair pickLightestCellOnServerToImprove( - List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { - cluster.sortServersByRegionCount(); - int[][] regionsPerServer = cluster.regionsPerServer; - - Pair leastFrequentCellOnServer = Pair.newPair((short) -1, -1); - + private Pair pickLeastFrequentCellOnMostLoadedServer( + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { int targetCellsPerServer = Ints.checkedCast( - (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)) + 1; - int targetRegionsPerServer = Ints.checkedCast( - (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - double allowableImbalanceInRegions = 1.03; - - int lowestCellCountSoFar = Integer.MAX_VALUE; - double leastCellsReservoirRandom = -1; - - for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { - Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); - - if (cellsOnServer.keySet().size() <= targetCellsPerServer) { - continue; - } - - Optional> leastFrequentCellMaybe = - cellsOnServer.entrySet().stream().min(Map.Entry.comparingByValue()); - - if (!leastFrequentCellMaybe.isPresent()) { - continue; - } - - short leastFrequentCell = leastFrequentCellMaybe.get().getKey(); - int leastFrequentCellCount = leastFrequentCellMaybe.get().getValue(); - - long numServersWithLeastFrequentCellNotSaturated = - IntStream.range(0, cluster.numServers) - .filter(server -> { - Map cellCountsForServer = cellGroupSizesPerServer.get(server); + (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); - if (!cellCountsForServer.containsKey(leastFrequentCell)) { - return false; - } + int highestLoadedServer = IntStream.range(0, cluster.numServers).boxed() + .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) + .collect(Collectors.toList()).get(cluster.numServers - 1); - return cellCountsForServer.keySet().size() != 1 || regionsPerServer[server].length - <= Math.ceil(targetRegionsPerServer * allowableImbalanceInRegions); - }) - .count(); + Map cellCountsForHighestLoadedServer = cellGroupSizesPerServer.get(highestLoadedServer); + int numCellsOnHighestLoadedServer = cellCountsForHighestLoadedServer.keySet().size(); - // if we're down to only one server unsaturated with the least frequent cell, there are no good swaps - if (numServersWithLeastFrequentCellNotSaturated == 1) { - continue; - } - - // we don't know how many servers have the same cell count, so use a simplified online - // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) - if (leastFrequentCellCount < lowestCellCountSoFar) { - leastFrequentCellOnServer = Pair.newPair(leastFrequentCell, serverIndex); - lowestCellCountSoFar = leastFrequentCellCount; - leastCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (leastFrequentCellCount == lowestCellCountSoFar) { - double maxCellRandom = ThreadLocalRandom.current().nextDouble(); - if (maxCellRandom > leastCellsReservoirRandom) { - leastFrequentCellOnServer = Pair.newPair(leastFrequentCell, serverIndex); - leastCellsReservoirRandom = maxCellRandom; - } - } + if (numCellsOnHighestLoadedServer <= targetCellsPerServer + 1) { + return Pair.newPair((short) -1, -1); } - return leastFrequentCellOnServer; + return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), highestLoadedServer); } private static Map computeCellGroupSizes(BalancerClusterState cluster, From 9199cd8000427d9508e39281645aff204f2c7b14 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 21 Nov 2024 08:59:47 -0500 Subject: [PATCH 077/126] First cleanup --- .../master/balancer/BalancerClusterState.java | 4 - .../HubSpotCellBasedCandidateGenerator.java | 681 ++++++++---------- .../master/balancer/HubSpotScratchFile.java | 79 -- 3 files changed, 284 insertions(+), 480 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 86974ae39ff8..de1cb5793017 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -123,10 +123,6 @@ public String getRack(ServerName server) { } } - BalancerClusterState() { - this.rackManager = null; - } - BalancerClusterState(Map> clusterState, Map> loads, RegionLocationFinder regionFinder, RackManager rackManager) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index b6f9b1fd8e4b..1e79b14f3da4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -42,6 +42,7 @@ @InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { private static final int NO_SERVER = -1; + private static final int NO_REGION = -1; private static final boolean DEBUG_MAJOR = false; private static final boolean DEBUG_MINOR = false; @@ -83,19 +84,26 @@ private BalanceAction generateAction( ) { int targetRegionsPerServer = Ints.checkedCast( (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + List regionCounts = Arrays.stream(cluster.regionsPerServer).map(regions -> regions.length) + .collect(Collectors.toList()); + + List> bigServers = + cellGroupSizesPerServer.stream().filter(e -> e.keySet().size() > 7) + .collect(Collectors.toList()); + Map collective = new HashMap<>(); + bigServers.forEach(e -> e.forEach((k, v) -> collective.merge(k, v, Integer::sum))); List underloadedServers = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer).boxed() - .distinct() - .collect(Collectors.toList()); - List overloadedServers = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length > targetRegionsPerServer).boxed() - .distinct() + .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1).boxed() .collect(Collectors.toList()); // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within plus/minus 1 of the target - if (!underloadedServers.isEmpty() && !overloadedServers.isEmpty()) { - return moveRegionFromOverloadedToUnderloaded(overloadedServers, underloadedServers, cellGroupSizesPerServer, cluster); + if (!underloadedServers.isEmpty()) { + List serversThatCanLoseOneRegion = IntStream.range(0, cluster.numServers) + .filter(server -> cluster.regionsPerServer[server].length >= targetRegionsPerServer).boxed() + .collect(Collectors.toList()); + + return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, cellGroupSizesPerServer, cluster); } // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps @@ -116,6 +124,12 @@ private BalanceAction generateAction( return BalanceAction.NULL_ACTION; } + BalanceAction swapAttempt = giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); + + if (swapAttempt != BalanceAction.NULL_ACTION) { + return swapAttempt; + } + return giveAwaySomeRegionToImprove( cellOnServer, cellGroupSizesPerServer, @@ -124,336 +138,177 @@ private BalanceAction generateAction( ); } - private BalanceAction moveRegionFromOverloadedToUnderloaded( - List overloadedServers, - List underloadedServers, + private Pair pickSecondMostFrequentCellOnAnyUnsaturatedServer( List> cellGroupSizesPerServer, + int[] cellCounts, BalancerClusterState cluster ) { - int underloadedServer = - underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); - int overloadedServer = overloadedServers.get(ThreadLocalRandom.current().nextInt(overloadedServers.size())); - short cellToMove = pickLeastFrequentCell(cellGroupSizesPerServer.get(overloadedServer)); - Multimap cellsByRegionForOverloadedServer = - computeCellsByRegion(cluster.regionsPerServer[overloadedServer], cluster.regions); - - for (int overloadedServerCandidate : overloadedServers) { - short lightest = pickLeastFrequentCell(cellGroupSizesPerServer.get(overloadedServerCandidate)); - if (cellGroupSizesPerServer.get(overloadedServerCandidate).get(lightest) == 1) { - overloadedServer = overloadedServerCandidate; - cellToMove = lightest; - cellsByRegionForOverloadedServer = computeCellsByRegion(cluster.regionsPerServer[overloadedServer], cluster.regions); - break; - } - } + return IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> cellGroupSizesPerServer.get(server).size() > 1) + .map(server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) + .sorted(Comparator.comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) + .findFirst() + .orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); + } - return getAction( - overloadedServer, - pickRegionForCell(cellsByRegionForOverloadedServer, cellToMove), underloadedServer, - -1 - ); + private short get2ndMostFrequentCell(Map countOfCells) { + short mostFrequent = pickMostFrequentCell(countOfCells); + return countOfCells.keySet().stream() + .filter(cell -> cell != mostFrequent) + .max(Comparator.comparing(countOfCells::get)) + .get(); } - private BalanceAction giveAwaySomeRegionToImprove( + private BalanceAction giveAwayRegionViaSwap( Pair cellOnServer, List> cellGroupSizesPerServer, - int[] cellCounts, BalancerClusterState cluster ) { + short sourceCell = cellOnServer.getFirst(); + int sourceServer = cellOnServer.getSecond(); - short cellToRemove = cellOnServer.getFirst(); - int serverToYieldCell = cellOnServer.getSecond(); + Map sourceCellCounts = cellGroupSizesPerServer.get(sourceServer); + Set sourceCells = sourceCellCounts.keySet(); - Map cellCountsOnServerToYieldCell = cellGroupSizesPerServer.get(serverToYieldCell); - Set cellsOnServerToYieldCell = cellCountsOnServerToYieldCell.keySet(); + Optional otherServerWithSharedCellAndMostOfTheCellToGiveAway = + IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> server != sourceServer) + .filter(server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell)) + .filter(server -> Sets.intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) + .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(sourceCell))); - int targetRegionsPerServer = Ints.checkedCast( - (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - double allowableImbalanceInRegions = 1.05; + if (!otherServerWithSharedCellAndMostOfTheCellToGiveAway.isPresent()) { + return BalanceAction.NULL_ACTION; + } - Set partialCandidatesWithInstanceOfOurCell = new HashSet<>(); - Set fullCandidatesWithInstanceOfOurCell = new HashSet<>(); - List candidatesWithFewerTotalCells = new ArrayList<>(); - for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { - if (server == serverToYieldCell) { - continue; - } + int targetServer = otherServerWithSharedCellAndMostOfTheCellToGiveAway.get(); + Map targetCells = cellGroupSizesPerServer.get(targetServer); - Map cellsOnServer = cellGroupSizesPerServer.get(server); + short targetCell = targetCells.keySet().stream().filter(cell -> cell != sourceCell) + .filter(sourceCells::contains).findAny().get(); - Set cellsOnServerAndOthers = - cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) - .collect(Collectors.toSet()); + return swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); + } - double maxAllowedRegionCountPerServer = allowableImbalanceInRegions * targetRegionsPerServer; - if (cellsOnServer.keySet().size() < cellCountsOnServerToYieldCell.keySet().size() && - cluster.regionsPerServer[server].length <= Math.ceil(maxAllowedRegionCountPerServer)) { - candidatesWithFewerTotalCells.add(server); + private BalanceAction moveRegionFromOverloadedToUnderloaded( + List overloadedServers, + List underloadedServers, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + List overloadedServersMostToLeastCells = overloadedServers.stream().sorted( + Comparator.comparing(server -> -1 * cellGroupSizesPerServer.get(server).keySet().size())) + .collect(Collectors.toList()); + // if there's a server w/ excess that has a single instance of a cell that we already have, + // prioritize that first (easy +2) + for (int source : overloadedServersMostToLeastCells) { + for (int target : underloadedServers) { + Map cellsOnSource = cellGroupSizesPerServer.get(source); + Map cellsOnTarget = cellGroupSizesPerServer.get(target); + + List singletonCellsOnSourceWeCanMoveToTarget = + cellsOnSource.keySet().stream().filter(cell -> cellsOnSource.get(cell) == 1) + .filter(cellsOnTarget::containsKey).collect(Collectors.toList()); + + if (!singletonCellsOnSourceWeCanMoveToTarget.isEmpty()) { + Multimap cellsByRegionOnSource = + computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); + short cellToMove = singletonCellsOnSourceWeCanMoveToTarget.get( + ThreadLocalRandom.current().nextInt(singletonCellsOnSourceWeCanMoveToTarget.size())); + + return getAction( + source, + pickRegionForCell(cellsByRegionOnSource, cellToMove), + target, + -1 + ); + } } + } - // if that server is perfectly isolated, don't allow that to be broken even to fix another - if (cellsOnServer.keySet().size() == 1 && !cellsOnServer.containsKey(cellToRemove)) { - continue; - } + int target = + underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); - if (cluster.regionsPerServer[server].length >= Math.ceil(maxAllowedRegionCountPerServer)) { - continue; + // if there's a server w/ excess that has a singleton cell we don't have but only one instance, accept it + // (0, neutral) + for (int source : overloadedServersMostToLeastCells) { + Map cellCountsOnServer = cellGroupSizesPerServer.get(source); + short leastFrequentCell = pickLeastFrequentCell(cellCountsOnServer); + if (cellCountsOnServer.get(leastFrequentCell) == 1) { + return getAction( + source, + pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), leastFrequentCell), + target, + NO_REGION + ); } + } - if (cellsOnServer.containsKey(cellToRemove)) { - if (cellsOnServer.keySet().size() == 1 - || cellsOnServerAndOthers.size() == 1) { - fullCandidatesWithInstanceOfOurCell.add(server); - } else { - partialCandidatesWithInstanceOfOurCell.add(server); - } - - Sets.SetView cellsInCommon = - Sets.intersection(cellsOnServerToYieldCell, cellsOnServer.keySet()); - - if (cellsInCommon.size() > 1) { - short commonCellToSwap = - cellsInCommon.stream().filter(cell -> cell != cellToRemove).findAny().get(); - SwapRegionsAction action = - swap(server, cellToRemove, serverToYieldCell, commonCellToSwap, cluster); - if (LOG.isDebugEnabled() || DEBUG_MINOR) { - int sourceOldTotal = cellsOnServerToYieldCell.size(); - int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); - int targetOldTotal = cellsOnServer.size(); - int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(commonCellToSwap) == 1 ? 1 : 0); - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; + // ok, we give up. just pick a random region from the least loaded cell of some instance and call it a day + // this will be (-1) but allows balancing to continue + int source = overloadedServersMostToLeastCells.get( + ThreadLocalRandom.current().nextInt(Math.min(overloadedServersMostToLeastCells.size(), 5))); + short cellToMove = pickLeastFrequentCell(cellGroupSizesPerServer.get(source)); - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; + Multimap cellsByRegionForSource = + computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); + return getAction(source, + pickRegionForCell(cellsByRegionForSource, cellToMove), target, + NO_REGION + ); + } - System.out.printf( - "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - commonCellToSwap, - cellCountsOnServerToYieldCell.get(commonCellToSwap), - commonCellToSwap, - cellsOnServer.get(commonCellToSwap), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " - + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - commonCellToSwap, - cellCountsOnServerToYieldCell.get(commonCellToSwap), - commonCellToSwap, - cellsOnServer.get(commonCellToSwap), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); - } - return action; - } - } - } + private BalanceAction giveAwaySomeRegionToImprove( + Pair cellOnServer, + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { - int serverToSend = -1; - List candidates = new ArrayList<>(partialCandidatesWithInstanceOfOurCell); + short cell = cellOnServer.getFirst(); + int sourceServer = cellOnServer.getSecond(); - Optional serverWeCanImprove = candidates.stream() - .filter( - server -> { - Map countsForServer = cellGroupSizesPerServer.get(server); - return countsForServer.keySet().stream() - .anyMatch(cell -> cell != cellToRemove && countsForServer.get(cell) == 1); - } - ).findFirst(); - - if (partialCandidatesWithInstanceOfOurCell.isEmpty() && fullCandidatesWithInstanceOfOurCell.isEmpty()) { - // nobody else has a copy of this cell that we can offload, we'll need to increase another server's load to reduce ours - serverToSend = candidatesWithFewerTotalCells.get(ThreadLocalRandom.current().nextInt(candidatesWithFewerTotalCells.size())); - } else if (serverWeCanImprove.isPresent()) { - int serverToSwap = serverWeCanImprove.get(); - Map cellsOnServer = cellGroupSizesPerServer.get(serverToSwap); - short cellToTakeFromSwap = cellsOnServer.keySet().stream() - .filter(cell -> cell != cellToRemove && cellsOnServer.get(cell) == 1) - .findFirst() - .get(); - - SwapRegionsAction action = - swap(serverToSwap, cellToRemove, serverToYieldCell, cellToTakeFromSwap, cluster); - - if (LOG.isDebugEnabled() || DEBUG_MINOR) { - int sourceOldTotal = cellsOnServerToYieldCell.size(); - int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); - int targetOldTotal = cellsOnServer.size(); - int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToRemove) == 1 ? 1 : 0); - - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; - - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - - System.out.printf( - "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - cellToTakeFromSwap, - cellCountsOnServerToYieldCell.get(cellToTakeFromSwap), - cellToTakeFromSwap, - cellsOnServer.get(cellToTakeFromSwap), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " - + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - cellToTakeFromSwap, - cellCountsOnServerToYieldCell.get(cellToTakeFromSwap), - cellToTakeFromSwap, - cellsOnServer.get(cellToTakeFromSwap), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); - } - return action; - } else if (!fullCandidatesWithInstanceOfOurCell.isEmpty()) { - serverToSend = fullCandidatesWithInstanceOfOurCell.stream() - .findAny() - .get(); - } else { - candidates.sort(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cellToRemove))); + Map cellCountsOnSource = cellGroupSizesPerServer.get(sourceServer); + Set cellsOnSource = cellCountsOnSource.keySet(); - serverToSend = candidates.get(Math.max(0, candidates.size() - 1)); - int numInstancesOfCellOnServerToSend = cellGroupSizesPerServer.get(serverToSend).get(cellToRemove); - double reservoirRandom = ThreadLocalRandom.current().nextDouble(); - for (int i = candidates.size() - 2; i >= 0; i--) { - int nextCandidate = candidates.get(i); - int numInstancesOfCellOnNextCandidate = cellGroupSizesPerServer.get(nextCandidate).get(cellToRemove); + Optional otherServerWithThisCell = pickOtherServerWithThisCellToGiveItTo( + cell, sourceServer, cellGroupSizesPerServer, cluster + ); - if (numInstancesOfCellOnNextCandidate < numInstancesOfCellOnServerToSend) { - break; - } + int targetServer = NO_SERVER; - double nextRandom = ThreadLocalRandom.current().nextDouble(); - if (nextRandom > reservoirRandom) { - reservoirRandom = nextRandom; - serverToSend = nextCandidate; - numInstancesOfCellOnServerToSend = numInstancesOfCellOnNextCandidate; - } - } + if (otherServerWithThisCell.isPresent()) { + targetServer = otherServerWithThisCell.get(); + } else { + Optional lowerLoadedServer = + pickOtherLowerLoadedServerToGiveCell(sourceServer, cellGroupSizesPerServer, cluster); - short cellToTake = pickRandomMinorityCell(cellGroupSizesPerServer.get(serverToSend)); - - Map cellsOnServer = cellGroupSizesPerServer.get(serverToSend); - SwapRegionsAction action = - swap(serverToSend, cellToRemove, serverToYieldCell, cellToTake, cluster); - - if (LOG.isDebugEnabled() || DEBUG_MINOR) { - int sourceOldTotal = cellsOnServerToYieldCell.size(); - int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); - int targetOldTotal = cellsOnServer.size(); - int targetNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToRemove) == 1 ? 1 : 0); - - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; - - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - - System.out.printf( - "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - cellToTake, - cellCountsOnServerToYieldCell.get(cellToTake), - cellToTake, - cellsOnServer.get(cellToTake), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " - + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), - cellToTake, - cellCountsOnServerToYieldCell.get(cellToTake), - cellToTake, - cellsOnServer.get(cellToTake), - cellToRemove, - cellsOnServer.get(cellToRemove), - descrOfQuality - ); + if (lowerLoadedServer.isPresent()) { + targetServer = lowerLoadedServer.get(); } - return action; } - Multimap cellsByRegion = - computeCellsByRegion(cluster.regionsPerServer[serverToYieldCell], cluster.regions); + if (targetServer == NO_SERVER) { + return BalanceAction.NULL_ACTION; + } MoveRegionAction action = (MoveRegionAction) getAction( - serverToYieldCell, - pickRegionForCell(cellsByRegion, cellToRemove), - serverToSend, - -1 + sourceServer, + pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), + targetServer, + NO_REGION ); - Map cellsOnTarget = cellGroupSizesPerServer.get(serverToSend); - if (LOG.isDebugEnabled() || DEBUG_MINOR) { - int sourceOldTotal = cellsOnServerToYieldCell.size(); - int sourceNewTotal = cellsOnServerToYieldCell.size() - (cellCountsOnServerToYieldCell.get(cellToRemove) == 1 ? 1 : 0); + Map cellsOnTarget = cellGroupSizesPerServer.get(targetServer); + int sourceOldTotal = cellsOnSource.size(); + int sourceNewTotal = cellsOnSource.size() - (cellCountsOnSource.get(cell) == 1 ? 1 : 0); int targetOldTotal = cellsOnTarget.size(); - int targetNewTotal = cellsOnTarget.size() + (cellsOnTarget.get(cellToRemove) == 0 ? 1 : 0); + int targetNewTotal = cellsOnTarget.size() - (cellsOnTarget.get(cell) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; @@ -467,28 +322,22 @@ private BalanceAction giveAwaySomeRegionToImprove( "BAD"; System.out.printf( - "Moving s%d.r%d c[%d / %d] to s%d. SOURCE is %d -> %d, TARGET is %d -> %d. Change is %s\n", + "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", action.getFromServer(), action.getRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), action.getToServer(), - sourceOldTotal, - sourceNewTotal, - targetOldTotal, - targetNewTotal, + cell, + cellCountsOnSource.get(cell), + cellsOnTarget.get(cell), descrOfQuality ); - LOG.debug("Moving s{}.r{} c[{} / {}] to s{}. SOURCE is {} -> {}, TARGET is {} -> {}. Change is {}", + LOG.debug("Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", action.getFromServer(), action.getRegion(), - cellToRemove, - cellCountsOnServerToYieldCell.get(cellToRemove), action.getToServer(), - sourceOldTotal, - sourceNewTotal, - targetOldTotal, - targetNewTotal, + cell, + cellCountsOnSource.get(cell), + cellsOnTarget.get(cell), descrOfQuality ); } @@ -496,11 +345,50 @@ private BalanceAction giveAwaySomeRegionToImprove( return action; } - private short pickRandomMinorityCell(Map cellCounts) { - short preservedCell = pickMostFrequentCell(cellCounts); - List candidates = cellCounts.keySet().stream().filter(cell -> cell != preservedCell) - .collect(Collectors.toList()); - return candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); + private Optional pickOtherLowerLoadedServerToGiveCell( + int sourceServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + List serversByCellCountAsc = + IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) + .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) + .collect(Collectors.toList()); + + int serverToPick = NO_SERVER; + int lowestCountSoFar = Integer.MAX_VALUE; + double reservoirRandom = -1; + + for (int server : serversByCellCountAsc) { + int cellCount = cellGroupSizesPerServer.get(server).keySet().size(); + if (cellCount < lowestCountSoFar) { + serverToPick = server; + lowestCountSoFar = cellCount; + reservoirRandom = ThreadLocalRandom.current().nextDouble(); + } else if (cellCount == lowestCountSoFar) { + double serverRandom = ThreadLocalRandom.current().nextDouble(); + if (serverRandom > reservoirRandom) { + serverToPick = server; + reservoirRandom = serverRandom; + } + } + } + + return Optional.of(serverToPick).filter(server -> server != NO_SERVER); + } + + private Optional pickOtherServerWithThisCellToGiveItTo( + short cell, + int sourceServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + return IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> server != sourceServer) + .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) + .filter(server -> cluster.regionsPerServer[server].length <= Math.ceil((double) cluster.numRegions / cluster.numServers)) + .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cell))); } private short pickLeastFrequentCell( @@ -556,62 +444,47 @@ private short pickMostFrequentCell( private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, List> cellGroupSizesPerServer, BalancerClusterState cluster) { - short cellToImprove = cellOnServer.getFirst(); - int serverToImprove = cellOnServer.getSecond(); + short sourceCell = cellOnServer.getFirst(); + int targetServer = cellOnServer.getSecond(); - if (serverToImprove == -1) { - if (LOG.isTraceEnabled()) { - LOG.trace("No server available to improve"); - } - return BalanceAction.NULL_ACTION; - } - - Map cellCountsOnServerToImprove = cellGroupSizesPerServer.get(serverToImprove); - Set cellsOnServerToImprove = cellCountsOnServerToImprove.keySet(); - - if (serverToImprove < 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("No server with cells found"); - } - return BalanceAction.NULL_ACTION; - } + Map cellCountsOnTargetServer = cellGroupSizesPerServer.get(targetServer); + Set cellsOnTargetServer = cellCountsOnTargetServer.keySet(); - if (cluster.regionsPerServer[serverToImprove].length == 0) { + if (cluster.regionsPerServer[targetServer].length == 0) { if (LOG.isTraceEnabled()) { - LOG.trace("{} has no regions", serverToImprove); + LOG.trace("{} has no regions", targetServer); } return BalanceAction.NULL_ACTION; } - Set candidateSet = new HashSet<>(); - for (int server = 0; server < cellGroupSizesPerServer.size(); server++) { - if (server == serverToImprove) { + Set sourceCandidateSet = new HashSet<>(); + for (int sourceServerCandidate = 0; sourceServerCandidate < cellGroupSizesPerServer.size(); sourceServerCandidate++) { + if (sourceServerCandidate == targetServer) { continue; } - Map cellsOnServer = cellGroupSizesPerServer.get(server); + Map cellsOnSourceCandidate = cellGroupSizesPerServer.get(sourceServerCandidate); // if that server is perfectly isolated, don't allow that to be broken even to fix another - if (cellsOnServer.keySet().size() == 1) { + if (cellsOnSourceCandidate.keySet().size() == 1) { continue; } - if (cellsOnServer.containsKey(cellToImprove)) { - candidateSet.add(server); + if (cellsOnSourceCandidate.containsKey(sourceCell)) { + sourceCandidateSet.add(sourceServerCandidate); Sets.SetView cellsInCommon = - Sets.intersection(cellsOnServerToImprove, cellsOnServer.keySet()); + Sets.intersection(cellsOnTargetServer, cellsOnSourceCandidate.keySet()); if (cellsInCommon.size() > 1) { short commonCellToSwap = - cellsInCommon.stream().filter(cell -> cell != cellToImprove).findAny().get(); - SwapRegionsAction action = - swap(serverToImprove, cellToImprove, server, commonCellToSwap, cluster); + cellsInCommon.stream().filter(cell -> cell != sourceCell).findAny().get(); + SwapRegionsAction action = swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { - int sourceOldTotal = cellsOnServer.size(); - int sourceNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToImprove) == 1 ? 1 : 0); - int targetOldTotal = cellsOnServerToImprove.size(); - int targetNewTotal = cellCountsOnServerToImprove.size() - (cellCountsOnServerToImprove.get(commonCellToSwap) == 1 ? 1 : 0); + int sourceOldTotal = cellsOnSourceCandidate.size(); + int sourceNewTotal = cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); + int targetOldTotal = cellsOnTargetServer.size(); + int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; @@ -632,13 +505,13 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, action.getToServer(), action.getToRegion(), commonCellToSwap, - cellCountsOnServerToImprove.get(commonCellToSwap), - cellToImprove, - cellCountsOnServerToImprove.get(cellToImprove), - cellToImprove, - cellsOnServer.get(cellToImprove), + cellCountsOnTargetServer.get(commonCellToSwap), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSourceCandidate.get(sourceCell), commonCellToSwap, - cellsOnServer.get(commonCellToSwap), + cellsOnSourceCandidate.get(commonCellToSwap), descrOfQuality ); LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " @@ -648,13 +521,13 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, action.getToServer(), action.getToRegion(), commonCellToSwap, - cellCountsOnServerToImprove.get(commonCellToSwap), - cellToImprove, - cellCountsOnServerToImprove.get(cellToImprove), - cellToImprove, - cellsOnServer.get(cellToImprove), + cellCountsOnTargetServer.get(commonCellToSwap), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSourceCandidate.get(sourceCell), commonCellToSwap, - cellsOnServer.get(commonCellToSwap), + cellsOnSourceCandidate.get(commonCellToSwap), descrOfQuality ); } @@ -663,29 +536,28 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, } } - List candidates = new ArrayList<>(candidateSet); + List candidates = new ArrayList<>(sourceCandidateSet); if (candidates.isEmpty()) { // this means we've reached the end of the road for this particular cell return BalanceAction.NULL_ACTION; } - int serverToSwap = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); - short cellToOffer = cellsOnServerToImprove.stream() - .filter(cell -> cell != cellToImprove) - .collect(Collectors.toList()) - .get(ThreadLocalRandom.current().nextInt(cellsOnServerToImprove.size() - 1)); + int sourceServer = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); + Map cellsOnSource = cellGroupSizesPerServer.get(sourceServer); + short targetCell = cellsOnTargetServer.stream() + .filter(cell -> cell != sourceCell) + .sorted(Comparator.comparing(cellCountsOnTargetServer::get)) + .findFirst() + .get(); - Map cellsOnServer = cellGroupSizesPerServer.get(serverToSwap); - - SwapRegionsAction action = - swap(serverToImprove, cellToImprove, serverToSwap, cellToOffer, cluster); + SwapRegionsAction action = swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { - int sourceOldTotal = cellsOnServer.size(); - int sourceNewTotal = cellsOnServer.size() - (cellsOnServer.get(cellToImprove) == 1 ? 1 : 0); - int targetOldTotal = cellsOnServerToImprove.size(); - int targetNewTotal = cellCountsOnServerToImprove.size() - (cellCountsOnServerToImprove.get(cellToOffer) == 1 ? 1 : 0); + int sourceOldTotal = cellsOnSource.size(); + int sourceNewTotal = cellsOnSource.size() - (cellsOnSource.get(sourceCell) == 1 ? 1 : 0); + int targetOldTotal = cellsOnTargetServer.size(); + int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; @@ -705,14 +577,14 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, action.getFromRegion(), action.getToServer(), action.getToRegion(), - cellToOffer, - cellCountsOnServerToImprove.get(cellToOffer), - cellToImprove, - cellCountsOnServerToImprove.get(cellToImprove), - cellToImprove, - cellsOnServer.get(cellToImprove), - cellToOffer, - cellsOnServer.get(cellToOffer), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), descrOfQuality ); LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " @@ -721,14 +593,14 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, action.getFromRegion(), action.getToServer(), action.getToRegion(), - cellToOffer, - cellCountsOnServerToImprove.get(cellToOffer), - cellToImprove, - cellCountsOnServerToImprove.get(cellToImprove), - cellToImprove, - cellsOnServer.get(cellToImprove), - cellToOffer, - cellsOnServer.get(cellToOffer), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), descrOfQuality ); } @@ -736,6 +608,21 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, return action; } + private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, BalancerClusterState cluster) { + return (SwapRegionsAction) getAction( + fromServer, + resolveCellToRegion(cluster, fromServer, fromCell), + toServer, + resolveCellToRegion(cluster, toServer, toCell) + ); + } + + private int resolveCellToRegion(BalancerClusterState cluster, int server, short cell) { + Multimap cellsByRegion = + computeCellsByRegion(cluster.regionsPerServer[server], cluster.regions); + return pickRegionForCell(cellsByRegion, cell); + } + private SwapRegionsAction swap( int receivingServer, short cellToGiveToReceivingServer, @@ -796,21 +683,21 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( continue; } - List> cellsByFrequencyDesc = + List> cellsByFrequencyAsc = cellsOnServer.entrySet().stream().sorted(Map.Entry.comparingByValue()) .collect(Collectors.toList()); - if (cellsByFrequencyDesc.isEmpty()) { + if (cellsByFrequencyAsc.isEmpty()) { continue; } - int probe = cellsByFrequencyDesc.size() - 1; + int probe = cellsByFrequencyAsc.size() - 1; short mostFrequentCellTemp = -1; int mostFrequentCellCountTemp = -1; do { - Map.Entry entry = cellsByFrequencyDesc.get(probe); + Map.Entry entry = cellsByFrequencyAsc.get(probe); mostFrequentCellTemp = entry.getKey(); mostFrequentCellCountTemp = entry.getValue(); probe--; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java deleted file mode 100644 index b23de8979a4b..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.apache.hadoop.hbase.master.balancer; - -import java.io.IOException; -import java.net.URL; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import com.google.common.math.Quantiles; -import org.apache.hadoop.conf.Configuration; -import org.apache.hbase.thirdparty.com.google.common.math.Stats; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.base.Charsets; -import org.apache.hbase.thirdparty.com.google.common.io.Resources; - -public class HubSpotScratchFile { - private static final Logger LOG = LoggerFactory.getLogger(HubSpotScratchFile.class); - - public static void main(String[] args) throws IOException { - String file = Resources.readLines(new URL("file:///Users/eszabowexler/Downloads/cluster.json"), Charsets.UTF_8).stream() - .collect(Collectors.joining("\n")); - BalancerClusterState state = HubSpotCellCostFunction.OBJECT_MAPPER.fromJson(file, BalancerClusterState.class); - - - HubSpotCellCostFunction func = - new HubSpotCellCostFunction(new Configuration()); - HubSpotCellBasedCandidateGenerator generator = new HubSpotCellBasedCandidateGenerator(); - - func.prepare(state); - double cost = func.cost(); - Set movedRegions = new HashSet<>(); - Set fromServers = new HashSet<>(); - Set toServers = new HashSet<>(); - Set repeatMoveRegions = new HashSet<>(); - - double lastCost = cost; - int printFrequency = 500; - - for (int step = 0; step < 200_000; step++) { - if (step % printFrequency == 0) { - double costDelta = cost - lastCost; - lastCost = cost; - double costPerStep = costDelta / printFrequency; - - List size = HubSpotCellBasedCandidateGenerator.computeCellsPerRs(state); - Map quantiles = - Quantiles.scale(100).indexes(10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(size); - - System.out.printf("Step %d --> %.2f - %d regions moved (%d more than once), %d sources, %d targets. Moving %.2f per step, cumulative %.2f drop\t\t\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t\n", - step, cost, movedRegions.size(), repeatMoveRegions.size(), fromServers.size(), toServers.size(), costPerStep, costDelta, - quantiles.get(10), quantiles.get(20),quantiles.get(30),quantiles.get(40),quantiles.get(50),quantiles.get(60),quantiles.get(70),quantiles.get(80),quantiles.get(90),quantiles.get(100)); - } - BalanceAction action = generator.generate(state); - if (action instanceof SwapRegionsAction) { - SwapRegionsAction swapRegionsAction = (SwapRegionsAction) action; - - if (movedRegions.contains(swapRegionsAction.getFromRegion())) { - repeatMoveRegions.add(swapRegionsAction.getFromServer()); - } - if (movedRegions.contains(swapRegionsAction.getToRegion())) { - repeatMoveRegions.add(swapRegionsAction.getToRegion()); - } - - movedRegions.add(swapRegionsAction.getFromRegion()); - movedRegions.add(swapRegionsAction.getToRegion()); - fromServers.add(swapRegionsAction.getFromServer()); - toServers.add(swapRegionsAction.getToServer()); - } - - state.doAction(action); - func.postAction(action); - cost = func.cost(); - } - - LOG.info("{}", state); - } -} From a986195a522a871bac3a60240405bc99a73e0cb2 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 21 Nov 2024 09:00:49 -0500 Subject: [PATCH 078/126] Disable automatic logging for local runs --- .../HubSpotCellBasedCandidateGenerator.java | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 1e79b14f3da4..8f3e5f575913 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -321,16 +321,16 @@ private BalanceAction giveAwaySomeRegionToImprove( (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : "BAD"; - System.out.printf( - "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", - action.getFromServer(), - action.getRegion(), - action.getToServer(), - cell, - cellCountsOnSource.get(cell), - cellsOnTarget.get(cell), - descrOfQuality - ); +// System.out.printf( +// "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", +// action.getFromServer(), +// action.getRegion(), +// action.getToServer(), +// cell, +// cellCountsOnSource.get(cell), +// cellsOnTarget.get(cell), +// descrOfQuality +// ); LOG.debug("Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", action.getFromServer(), action.getRegion(), @@ -497,23 +497,23 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : "BAD"; - System.out.printf( - "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - commonCellToSwap, - cellCountsOnTargetServer.get(commonCellToSwap), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSourceCandidate.get(sourceCell), - commonCellToSwap, - cellsOnSourceCandidate.get(commonCellToSwap), - descrOfQuality - ); +// System.out.printf( +// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " +// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", +// action.getFromServer(), +// action.getFromRegion(), +// action.getToServer(), +// action.getToRegion(), +// commonCellToSwap, +// cellCountsOnTargetServer.get(commonCellToSwap), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellsOnSourceCandidate.get(sourceCell), +// commonCellToSwap, +// cellsOnSourceCandidate.get(commonCellToSwap), +// descrOfQuality +// ); LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", action.getFromServer(), @@ -570,23 +570,23 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : "BAD"; - System.out.printf( - "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - descrOfQuality - ); +// System.out.printf( +// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " +// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", +// action.getFromServer(), +// action.getFromRegion(), +// action.getToServer(), +// action.getToRegion(), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellsOnSource.get(sourceCell), +// sourceCell, +// cellsOnSource.get(sourceCell), +// descrOfQuality +// ); LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", action.getFromServer(), From 258caf912d29e78160b9af49c0542e97a80a701a Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 21 Nov 2024 09:21:39 -0500 Subject: [PATCH 079/126] Fix test --- .../hbase/master/balancer/TestHubSpotCellCostFunction.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index 73a5a3a43a2e..fa9f358883ec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -115,7 +115,7 @@ public void testCostBalanced() { buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 1 }, { 2 }, { 3 } }, + new int[] { 0 , 1 , 2 , 3 }, new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, ALL_REGIONS_SIZE_1_MB ); @@ -136,7 +136,7 @@ public void testCostImbalanced() { buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[][] { { 0 }, { 0 }, { 0 }, { 0 } }, + new int[] { 0 , 0 , 0 , 0 }, new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, ALL_REGIONS_SIZE_1_MB); assertTrue(cost > 0); From e95ef44dabbf97c14ff4d8425fbd350e06d14f94 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 26 Nov 2024 15:46:44 -0500 Subject: [PATCH 080/126] Stash work --- .../HubSpotCellBasedCandidateGenerator.java | 326 ++++++++++++++---- .../master/balancer/HubSpotScratchFile.java | 146 ++++++++ 2 files changed, 409 insertions(+), 63 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 8f3e5f575913..206ad2d7fa31 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -106,6 +107,13 @@ private BalanceAction generateAction( return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, cellGroupSizesPerServer, cluster); } + // Step 3: balanced regions, so let's take cells spread over many servers and collect them to one + BalanceAction action = moveCellOnMultipleServersFromLowLoadedServerToBetterOne(cellGroupSizesPerServer, cellCounts, cluster); + + if (action.getType() != BalanceAction.Type.NULL) { + return action; + } + // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps Pair cellOnServer = pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); @@ -113,7 +121,7 @@ private BalanceAction generateAction( return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); } - // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces + // Step 4: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can either swap // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that subsequent rounds // will use step (1) to repair the imbalance. @@ -138,6 +146,132 @@ private BalanceAction generateAction( ); } + private BalanceAction moveCellOnMultipleServersFromLowLoadedServerToBetterOne( + List> cellGroupSizesPerServer, + int[] numRegiosnPerCell, + BalancerClusterState cluster + ) { + // if there are 2 servers that aren't "full" with this cell, we can move all regions for it off + // the lower load one, onto the higher load one + List candidateCells = + IntStream.range(0, HubSpotCellCostFunction.MAX_CELL_COUNT).mapToObj(cell -> (short) cell) + .filter(cell -> { + long numSwapCandidates = IntStream.range(0, cluster.numServers).boxed().filter( + server -> isCellOnServerGoodSwapCandidate(server, cell, numRegiosnPerCell, + cellGroupSizesPerServer)).count(); + // if there are 2 servers that aren't "full" with this cell, we can move all regions for it off + // the lower load one, onto the higher load one + return numSwapCandidates > 1; + }) + .collect(Collectors.toList()); + + + List lightestCandidates = new ArrayList<>(); + int lowestCountSoFar = Integer.MAX_VALUE; + for (short cell : candidateCells) { + int lowestInstanceCountForCell = IntStream.range(0, cluster.numServers) + .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) + .map(server -> cellGroupSizesPerServer.get(server).get(cell)).min().getAsInt(); + + if (lowestInstanceCountForCell < lowestCountSoFar) { + lightestCandidates = new ArrayList<>(); + lightestCandidates.add(cell); + lowestCountSoFar = lowestInstanceCountForCell; + } else if (lowestInstanceCountForCell == lowestCountSoFar) { + lightestCandidates.add(cell); + } + } + + Collections.shuffle(lightestCandidates); + + if (lightestCandidates.isEmpty()) { + return BalanceAction.NULL_ACTION; + } + + for (int i = 0; i < lightestCandidates.size(); i++) { + short sourceCell = lightestCandidates.get(i); + List sourceCandidates = IntStream.range(0, cluster.numServers).boxed().filter( + server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell) + && cellGroupSizesPerServer.get(server).keySet().size() > 1 + && cellGroupSizesPerServer.get(server).get(sourceCell) != numRegiosnPerCell[sourceCell]) + .collect(Collectors.toList()); + + if (sourceCandidates.isEmpty()) { + continue; + } + + int sourceServer = sourceCandidates.stream() + .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).size())) + .get(); + + int sourceCellCount = cellGroupSizesPerServer.get(sourceServer).get(sourceCell); + + Optional targetServerMaybe = IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> server != sourceServer) + .filter(server -> isCellOnServerGoodSwapCandidate(server, sourceCell, numRegiosnPerCell, cellGroupSizesPerServer)) + .filter(server -> cellGroupSizesPerServer.get(server).get(sourceCell) >= sourceCellCount) + .filter(server -> cellGroupSizesPerServer.get(server).size() > 2) + .filter(server -> cellGroupSizesPerServer.get(server).size() > sourceCellCount) + .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).entrySet().stream().filter(entry -> entry.getValue() == 1).count())); + + if (!targetServerMaybe.isPresent()) { + continue; + } + + int targetServer = targetServerMaybe.get(); + + Optional targetCellMaybe = + cellGroupSizesPerServer.get(targetServer).keySet().stream().filter(cell -> cell != sourceCell) + .min(Comparator.comparing(cellGroupSizesPerServer.get(targetServer)::get)); + + if (!targetCellMaybe.isPresent()) { + continue; + } + + short targetCell = targetCellMaybe.get(); + + int change = + (cellGroupSizesPerServer.get(sourceServer).getOrDefault(sourceCell, 0) == 1 ? -1 : 0) + + (cellGroupSizesPerServer.get(targetServer).getOrDefault(sourceCell, 0) == 0 ? 1 : 0) + + (cellGroupSizesPerServer.get(sourceServer).getOrDefault(targetCell, 0) == 0 ? 1 : 0) + + (cellGroupSizesPerServer.get(targetServer).getOrDefault(targetCell, 0) == 1 ? -1 : 0) + ; + + if (change >= 0) { + continue; + } + + return swapCells("sparse cells", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); + } + + return BalanceAction.NULL_ACTION; + } + + private boolean isCellOnServerGoodSwapCandidate( + int server, + short cell, + int[] numRegionsPerCell, + List> cellGroupSizesPerServer + ) { + Map cellCounts = cellGroupSizesPerServer.get(server); + if (!cellCounts.containsKey(cell)) { + return false; + } + + int numOtherCellsWithRepresentationElsewhere = Ints.checkedCast( + cellCounts.keySet().stream() + .filter(testCell -> testCell != cell) + .filter(testCell -> cellCounts.get(testCell) < numRegionsPerCell[testCell]) + .count()); + + if (numOtherCellsWithRepresentationElsewhere == 0) { + return false; + } + + return true; + } + private Pair pickSecondMostFrequentCellOnAnyUnsaturatedServer( List> cellGroupSizesPerServer, int[] cellCounts, @@ -189,7 +323,7 @@ private BalanceAction giveAwayRegionViaSwap( short targetCell = targetCells.keySet().stream().filter(cell -> cell != sourceCell) .filter(sourceCells::contains).findAny().get(); - return swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); + return swapCells("improve least loaded", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); } private BalanceAction moveRegionFromOverloadedToUnderloaded( @@ -202,63 +336,79 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded( Comparator.comparing(server -> -1 * cellGroupSizesPerServer.get(server).keySet().size())) .collect(Collectors.toList()); // if there's a server w/ excess that has a single instance of a cell that we already have, - // prioritize that first (easy +2) + // prioritize that first (easy -2) for (int source : overloadedServersMostToLeastCells) { for (int target : underloadedServers) { Map cellsOnSource = cellGroupSizesPerServer.get(source); Map cellsOnTarget = cellGroupSizesPerServer.get(target); List singletonCellsOnSourceWeCanMoveToTarget = - cellsOnSource.keySet().stream().filter(cell -> cellsOnSource.get(cell) == 1) - .filter(cellsOnTarget::containsKey).collect(Collectors.toList()); + cellsOnSource.keySet().stream() + .filter(cell -> cellsOnSource.get(cell) == 1) + .filter(cellsOnTarget::containsKey) + .collect(Collectors.toList()); if (!singletonCellsOnSourceWeCanMoveToTarget.isEmpty()) { - Multimap cellsByRegionOnSource = - computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); short cellToMove = singletonCellsOnSourceWeCanMoveToTarget.get( ThreadLocalRandom.current().nextInt(singletonCellsOnSourceWeCanMoveToTarget.size())); - return getAction( - source, - pickRegionForCell(cellsByRegionOnSource, cellToMove), - target, - -1 - ); + return moveCell("restore -1", source, cellToMove, target, cellGroupSizesPerServer, cluster); } } } - int target = - underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); + // if there's a server w/ a singleton that we don't already have, accept it + // prioritize that next (0) + for (int source : overloadedServersMostToLeastCells) { + for (int target : underloadedServers) { + Map cellsOnSource = cellGroupSizesPerServer.get(source); + + List cellsOnSourcePresentOnTarget = + cellsOnSource.keySet() + .stream() + .filter(cell -> cellsOnSource.get(cell) == 1) + .collect(Collectors.toList()); + + if (!cellsOnSourcePresentOnTarget.isEmpty()) { + short cellToMove = cellsOnSourcePresentOnTarget.get(ThreadLocalRandom.current().nextInt(cellsOnSourcePresentOnTarget.size())); - // if there's a server w/ excess that has a singleton cell we don't have but only one instance, accept it - // (0, neutral) + return moveCell("restore 0", source, cellToMove, target, cellGroupSizesPerServer, cluster); + } + } + } + + // if there's a server w/ excess that has more than one instance of a cell that we already have, + // prioritize that next (0) for (int source : overloadedServersMostToLeastCells) { - Map cellCountsOnServer = cellGroupSizesPerServer.get(source); - short leastFrequentCell = pickLeastFrequentCell(cellCountsOnServer); - if (cellCountsOnServer.get(leastFrequentCell) == 1) { - return getAction( - source, - pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), leastFrequentCell), - target, - NO_REGION - ); + for (int target : underloadedServers) { + Map cellsOnSource = cellGroupSizesPerServer.get(source); + Map cellsOnTarget = cellGroupSizesPerServer.get(target); + + List cellsOnSourcePresentOnTarget = + cellsOnSource.keySet() + .stream() + .filter(cellsOnTarget::containsKey) + .collect(Collectors.toList()); + + if (!cellsOnSourcePresentOnTarget.isEmpty()) { + short cellToMove = cellsOnSourcePresentOnTarget.get(ThreadLocalRandom.current().nextInt(cellsOnSourcePresentOnTarget.size())); + + return moveCell("restore 0", source, cellToMove, target, cellGroupSizesPerServer, cluster); + } } } + int target = + underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); + // ok, we give up. just pick a random region from the least loaded cell of some instance and call it a day - // this will be (-1) but allows balancing to continue + // this will be (+1) but allows balancing to continue int source = overloadedServersMostToLeastCells.get( ThreadLocalRandom.current().nextInt(Math.min(overloadedServersMostToLeastCells.size(), 5))); short cellToMove = pickLeastFrequentCell(cellGroupSizesPerServer.get(source)); - Multimap cellsByRegionForSource = - computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); - return getAction(source, - pickRegionForCell(cellsByRegionForSource, cellToMove), target, - NO_REGION - ); + return moveCell("restore +1", source, cellToMove, target, cellGroupSizesPerServer, cluster); } private BalanceAction giveAwaySomeRegionToImprove( @@ -296,12 +446,7 @@ private BalanceAction giveAwaySomeRegionToImprove( return BalanceAction.NULL_ACTION; } - MoveRegionAction action = (MoveRegionAction) getAction( - sourceServer, - pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), - targetServer, - NO_REGION - ); + MoveRegionAction action = moveCell("give away", sourceServer, cell, targetServer, cellGroupSizesPerServer, cluster); if (LOG.isDebugEnabled() || DEBUG_MINOR) { Map cellsOnTarget = cellGroupSizesPerServer.get(targetServer); @@ -479,7 +624,7 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, if (cellsInCommon.size() > 1) { short commonCellToSwap = cellsInCommon.stream().filter(cell -> cell != sourceCell).findAny().get(); - SwapRegionsAction action = swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); + SwapRegionsAction action = swapCells("improve frequent 1", sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cellGroupSizesPerServer, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSourceCandidate.size(); int sourceNewTotal = cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); @@ -551,7 +696,7 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, .findFirst() .get(); - SwapRegionsAction action = swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); + SwapRegionsAction action = swapCells("improve frequent 2", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSource.size(); @@ -608,7 +753,69 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, return action; } - private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, BalancerClusterState cluster) { + private MoveRegionAction moveCell( + String originStep, + int fromServer, short fromCell, + int toServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + Map fromCounts = cellGroupSizesPerServer.get(fromServer); + Map toCounts = cellGroupSizesPerServer.get(toServer); + + String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + + fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "")) + .collect(Collectors.joining(", ", "{", "}")); + String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + + toCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + + int fromEmptiesFromCell = fromCounts.get(fromCell) == 1 ? -1 : 0; + int toGainsNewCell = toCounts.getOrDefault(fromCell, 0) == 0 ? 1 : 0; + + int change = fromEmptiesFromCell + toGainsNewCell; + + System.out.printf("[%20s]\t\t%2d\tmove %d:%d -> %d %s -> %s\n", + originStep, + change, + fromServer, fromCell, + toServer, fromCountsString, toCountsString + ); + + return (MoveRegionAction) getAction(fromServer, resolveCellToRegion(cluster, fromServer, fromCell), toServer, NO_REGION); + } + + private SwapRegionsAction swapCells( + String originStep, + int fromServer, short fromCell, + int toServer, short toCell, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + Map fromCounts = cellGroupSizesPerServer.get(fromServer); + Map toCounts = cellGroupSizesPerServer.get(toServer); + + String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + + fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + + toCounts.entrySet().stream().map(entry -> (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + + int fromEmptiesFromCell = fromCounts.get(fromCell) == 1 ? -1 : 0; + int fromGainsNewCell = fromCounts.getOrDefault(toCell, 0) == 0 ? 1 : 0; + int toEmptiesToCell = toCounts.get(toCell) == 1 ? -1 : 0; + int toGainsNewCell = toCounts.getOrDefault(fromCell, 0) == 0 ? 1 : 0; + + int change = fromEmptiesFromCell + fromGainsNewCell + toEmptiesToCell + toGainsNewCell; + + System.out.printf("[%20s]\t\t%2d\tswap %3d:%3d <-> %3d:%3d %s <-> %s\n", + originStep, + change, + fromServer, fromCell, + toServer, toCell, fromCountsString, toCountsString + ); + return (SwapRegionsAction) getAction( fromServer, resolveCellToRegion(cluster, fromServer, fromCell), @@ -623,24 +830,6 @@ private int resolveCellToRegion(BalancerClusterState cluster, int server, short return pickRegionForCell(cellsByRegion, cell); } - private SwapRegionsAction swap( - int receivingServer, - short cellToGiveToReceivingServer, - int offeringServer, - short cellToOfferFromReceivingServerToOrigin, - BalancerClusterState cluster - ) { - Multimap cellsByRegionForReceivingServer = - computeCellsByRegion(cluster.regionsPerServer[receivingServer], cluster.regions); - Multimap cellsByRegionForOfferingServer = - computeCellsByRegion(cluster.regionsPerServer[offeringServer], cluster.regions); - - return (SwapRegionsAction) getAction( - offeringServer, pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), - receivingServer, pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin) - ); - } - private int pickRegionForCell(Multimap cellsByRegionOnServer, short cellToMove) { return cellsByRegionOnServer.keySet().stream() .filter(region -> cellsByRegionOnServer.get(region).contains(cellToMove)) @@ -751,9 +940,20 @@ private Pair pickLeastFrequentCellOnMostLoadedServer( int targetCellsPerServer = Ints.checkedCast( (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); - int highestLoadedServer = IntStream.range(0, cluster.numServers).boxed() - .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) - .collect(Collectors.toList()).get(cluster.numServers - 1); + Optional highestLoadedServerMaybe = IntStream.range(0, cluster.numServers).boxed() + .filter(server -> cellGroupSizesPerServer.get(server).keySet().size() > targetCellsPerServer) + .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())); + + if (!highestLoadedServerMaybe.isPresent()) { + return Pair.newPair((short)-1, NO_SERVER); + } + + int sampleHighestLoadedServer = highestLoadedServerMaybe.get(); + int maxCellsOnAnyServer = cellGroupSizesPerServer.get(sampleHighestLoadedServer).keySet().size(); + List maxLoadedServers = IntStream.range(0, cluster.numServers).boxed() + .filter(server -> cellGroupSizesPerServer.get(server).keySet().size() == maxCellsOnAnyServer) + .collect(Collectors.toList()); + int highestLoadedServer = maxLoadedServers.get(ThreadLocalRandom.current().nextInt(maxLoadedServers.size())); Map cellCountsForHighestLoadedServer = cellGroupSizesPerServer.get(highestLoadedServer); int numCellsOnHighestLoadedServer = cellCountsForHighestLoadedServer.keySet().size(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java new file mode 100644 index 000000000000..d702b5823df4 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java @@ -0,0 +1,146 @@ +package org.apache.hadoop.hbase.master.balancer; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.OpenOption; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import com.google.common.math.Quantiles; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; +import org.apache.hbase.thirdparty.com.google.common.math.Stats; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.base.Charsets; +import org.apache.hbase.thirdparty.com.google.common.io.Resources; + +public class HubSpotScratchFile { + private static final Logger LOG = LoggerFactory.getLogger(HubSpotScratchFile.class); + + public static void main(String[] args) throws IOException { + BalancerClusterState original = loadCluster("cluster.json"); + BalancerClusterState state = loadCluster("cluster_partial.json"); + + HubSpotCellCostFunction func = + new HubSpotCellCostFunction(new Configuration()); + HubSpotCellBasedCandidateGenerator generator = new HubSpotCellBasedCandidateGenerator(); + + func.prepare(state); + double cost = func.cost(); + Set movedRegions = new HashSet<>(); + Set fromServers = new HashSet<>(); + Set toServers = new HashSet<>(); + Set repeatMoveRegions = new HashSet<>(); + + double lastCost = cost; + int printFrequency = 500; + int lastSnapshotAt = 10; + + for (int step = 0; step < 200_000; step++) { + if (step % printFrequency == 0) { + double costDelta = cost - lastCost; + lastCost = cost; + double costPerStep = costDelta / printFrequency; + + List size = HubSpotCellBasedCandidateGenerator.computeCellsPerRs(state); + Map quantiles = + Quantiles.scale(100).indexes(10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(size); + + System.out.printf("Step %d --> %.2f - %d regions moved (%d more than once), %d sources, %d targets. Moving %.2f per step, cumulative %.2f drop\t\t\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t\n", + step, cost, movedRegions.size(), repeatMoveRegions.size(), fromServers.size(), toServers.size(), costPerStep, costDelta, + quantiles.get(10), quantiles.get(20),quantiles.get(30),quantiles.get(40),quantiles.get(50),quantiles.get(60),quantiles.get(70),quantiles.get(80),quantiles.get(90),quantiles.get(100)); + + if (quantiles.get(100) < lastSnapshotAt) { + lastSnapshotAt = (int) Math.ceil(quantiles.get(100)); + writeStringToFile("/Users/eszabowexler/Downloads/cluster_partial.json", HubSpotCellCostFunction.OBJECT_MAPPER.toJson(state)); + writeStringToFile(String.format("/Users/eszabowexler/Downloads/hbase_instructions_%d.txt", lastSnapshotAt), generateShellCommands(original, state)); + } + } + BalanceAction action = generator.generate(state); + if (action instanceof SwapRegionsAction) { + SwapRegionsAction swapRegionsAction = (SwapRegionsAction) action; + + if (movedRegions.contains(swapRegionsAction.getFromRegion())) { + repeatMoveRegions.add(swapRegionsAction.getFromServer()); + } + if (movedRegions.contains(swapRegionsAction.getToRegion())) { + repeatMoveRegions.add(swapRegionsAction.getToRegion()); + } + + movedRegions.add(swapRegionsAction.getFromRegion()); + movedRegions.add(swapRegionsAction.getToRegion()); + fromServers.add(swapRegionsAction.getFromServer()); + toServers.add(swapRegionsAction.getToServer()); + } + + state.doAction(action); + func.postAction(action); + cost = func.cost(); + } + + LOG.info("{}", state); + } + + private static String generateShellCommands( + BalancerClusterState original, + BalancerClusterState state + ) { + int[][] newRegionsPerServer = state.regionsPerServer; + int[][] oldRegionsPerServer = original.regionsPerServer; + + return IntStream.range(0, original.numServers) + .boxed() + .flatMap(server -> { + int[] oldRegionsRaw = oldRegionsPerServer[server]; + int[] newRegionsRaw = newRegionsPerServer[server]; + + Set oldRegions = + Arrays.stream(oldRegionsRaw).mapToObj(oldRegion -> original.regions[oldRegion]) + .map(RegionInfo::getEncodedName) + .collect(Collectors.toSet()); + Set newRegions = + Arrays.stream(newRegionsRaw).mapToObj(newRegion -> state.regions[newRegion]) + .map(RegionInfo::getEncodedName) + .collect(Collectors.toSet()); + + Sets.SetView regionsMovedToThisServer = Sets.difference(newRegions, oldRegions); + ServerName serverName = state.servers[server]; + + return regionsMovedToThisServer.stream() + .map(encodedRegionName -> String.format("move '%s', '%s'", encodedRegionName, serverName.getServerName())); + }) + .collect(Collectors.joining("\n")); + } + + private static BalancerClusterState loadCluster(String filename) throws IOException { + System.out.printf("Loading %s\n", filename); + String file = Resources.readLines(new URL("file:///Users/eszabowexler/Downloads/" + filename), Charsets.UTF_8).stream() + .collect(Collectors.joining("\n")); + BalancerClusterState state = + HubSpotCellCostFunction.OBJECT_MAPPER.fromJson(file, BalancerClusterState.class); + System.out.printf("Loaded %s!\n", filename); + return state; + } + + // function to write string to file by absolute path + public static void writeStringToFile(String path, String content) { + try { + System.out.printf("Writing %s\n", path); + Files.write(Paths.get(path), content.getBytes()); + System.out.printf("Wrote %s!\n", path); + } catch (IOException e) { + e.printStackTrace(); + } + } +} From 221a8c48cc1ac4f797927eed0033e82ec0a9580e Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 26 Nov 2024 16:03:53 -0500 Subject: [PATCH 081/126] cost is actually how far we are from having as many cells as possible --- .../balancer/HubSpotCellCostFunction.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 27c95460894b..efc6e5f887d0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -203,6 +204,7 @@ static class Int2IntCounterMapAdapter implements JsonSerializer toCells(r.getStartKey(), r.getEndKey(), MAX_CELL_COUNT).forEach(cell -> regionCountByCell.addAndGet((int) cell, 1))); + this.bestCaseMaxCellsPerServer = balancedRegionsPerServer; + int numTimesCellRegionsFillAllServers = 0; + for (int cell = 0; cell < MAX_CELL_COUNT; cell++) { + int numRegionsForCell = regionCountByCell.get(cell); + numTimesCellRegionsFillAllServers += Ints.checkedCast((long) Math.floor((double) numRegionsForCell / numServers)); + } + + this.bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; + this.numRegionCellsOverassigned = calculateCurrentCellCost( numCells, @@ -450,7 +464,7 @@ static int calculateCurrentCellCost( } } - int costForThisServer = Math.max(cellsOnThisServer - bestCaseMaxCellsPerServer, 0); + int costForThisServer = Math.max(bestCaseMaxCellsPerServer - cellsOnThisServer, 0); if (LOG.isDebugEnabled()) { debugBuilder.append(server).append("=").append(costForThisServer).append(", "); } From 20cbb95aaf51452e3197fe375ca418f48c033162 Mon Sep 17 00:00:00 2001 From: Hernan Gelaf-Romer Date: Tue, 26 Nov 2024 16:13:53 -0500 Subject: [PATCH 082/126] add custom normalizer --- .../hbase/client/MutableRegionInfo.java | 36 +- .../org/apache/hadoop/hbase/ServerName.java | 11 +- .../org/apache/hadoop/hbase/TableName.java | 26 +- .../org/apache/hadoop/hbase/net/Address.java | 5 +- .../MetricsRegionServerSource.java | 6 +- .../MetricsRegionServerSourceImpl.java | 6 +- .../hadoop/hbase/ipc/NettyRpcServer.java | 5 +- .../master/balancer/BalancerClusterState.java | 149 +++-- .../HubSpotCellBasedCandidateGenerator.java | 534 ++++++++---------- .../balancer/HubSpotCellCostFunction.java | 170 +++--- .../master/balancer/RegionLocationFinder.java | 6 +- .../balancer/StochasticLoadBalancer.java | 19 +- .../HubspotCellAwareNormalizer.java | 78 +++ .../normalizer/RegionNormalizerFactory.java | 13 +- .../balancer/TestHubSpotCellCostFunction.java | 39 +- 15 files changed, 586 insertions(+), 517 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index 81e6d478b79d..779474c8e291 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -23,11 +23,12 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; + /** * An implementation of RegionInfo that adds mutable methods so can build a RegionInfo instance. * Package private. Use {@link RegionInfoBuilder} creating instances of {@link RegionInfo}s. @@ -55,17 +56,28 @@ class MutableRegionInfo implements RegionInfo { // zookeeper as of 0.90.0 HBase. And now in DisableTableProcedure, finally we will create bunch // of UnassignProcedures and at the last of the procedure we will set the region state to // CLOSED, and will not change the offLine flag. - @Expose private boolean offLine; - @Expose private boolean split; - @Expose private final long regionId; - @Expose private final int replicaId; - @Expose private final byte[] regionName; - @Expose private final byte[] startKey; - @Expose private final byte[] endKey; - @Expose private final int hashCode; - @Expose private final String encodedName; - @Expose private final byte[] encodedNameAsBytes; - @Expose private final TableName tableName; + @Expose + private boolean offLine; + @Expose + private boolean split; + @Expose + private final long regionId; + @Expose + private final int replicaId; + @Expose + private final byte[] regionName; + @Expose + private final byte[] startKey; + @Expose + private final byte[] endKey; + @Expose + private final int hashCode; + @Expose + private final String encodedName; + @Expose + private final byte[] encodedNameAsBytes; + @Expose + private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, final byte[] endKey, final long regionId, final int replicaId, boolean offLine, diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 9f32e64b9ff4..319b6f8474ea 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -26,13 +26,13 @@ import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.collect.Interner; import org.apache.hbase.thirdparty.com.google.common.collect.Interners; import org.apache.hbase.thirdparty.com.google.common.net.InetAddresses; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * Name of a particular incarnation of an HBase Server. A {@link ServerName} is used uniquely @@ -83,15 +83,18 @@ public class ServerName implements Comparable, Serializable { */ public static final String UNKNOWN_SERVERNAME = "#unknown#"; - @Expose private final String serverName; - @Expose private final long startCode; + @Expose + private final String serverName; + @Expose + private final long startCode; private transient Address address; /** * Cached versioned bytes of this ServerName instance. * @see #getVersionedBytes() */ - @Expose private byte[] bytes; + @Expose + private byte[] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList<>(0); /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index 0b077b724786..ad793209d5e1 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -24,10 +24,10 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * Immutable POJO class for representing a table name. Which is of the form: <table @@ -94,14 +94,22 @@ public static boolean isMetaTableName(final TableName tn) { */ public static final TableName OLD_META_TABLE_NAME = getADummyTableName(OLD_META_STR); - @Expose private final byte[] name; - @Expose private final String nameAsString; - @Expose private final byte[] namespace; - @Expose private final String namespaceAsString; - @Expose private final byte[] qualifier; - @Expose private final String qualifierAsString; - @Expose private final boolean systemTable; - @Expose private final int hashCode; + @Expose + private final byte[] name; + @Expose + private final String nameAsString; + @Expose + private final byte[] namespace; + @Expose + private final String namespaceAsString; + @Expose + private final byte[] qualifier; + @Expose + private final String qualifierAsString; + @Expose + private final boolean systemTable; + @Expose + private final int hashCode; /** * Check passed byte array, "tableName", is legal user-space table name. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index 5b35bfbd0edb..9b9d74b21985 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -21,11 +21,11 @@ import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.net.HostAndPort; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * An immutable type to hold a hostname and port combo, like an Endpoint or @@ -38,7 +38,8 @@ */ @InterfaceAudience.Public public class Address implements Comparable
{ - @Expose private final HostAndPort hostAndPort; + @Expose + private final HostAndPort hostAndPort; private Address(HostAndPort hostAndPort) { this.hostAndPort = hostAndPort; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index dca25ffde417..072b2e171858 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -524,10 +524,12 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo String ZEROCOPY_BYTES_READ_DESC = "The number of bytes read through HDFS zero copy"; String LOCAL_RACK_BYTES_READ = "localRackBytesRead"; - String LOCAL_RACK_BYTES_READ_DESC = "The number of bytes read from the same rack of the RegionServer, but not the local HDFS DataNode"; + String LOCAL_RACK_BYTES_READ_DESC = + "The number of bytes read from the same rack of the RegionServer, but not the local HDFS DataNode"; String REMOTE_RACK_BYTES_READ = "remoteRackBytesRead"; - String REMOTE_RACK_BYTES_READ_DESC = "The number of bytes read from a different rack from that of the RegionServer"; + String REMOTE_RACK_BYTES_READ_DESC = + "The number of bytes read from a different rack from that of the RegionServer"; String BLOCKED_REQUESTS_COUNT = "blockedRequestCount"; String BLOCKED_REQUESTS_COUNT_DESC = "The number of blocked requests because of memstore size is " diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index 1526ca3f5a8c..e6402b847015 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -553,8 +553,10 @@ private MetricsRecordBuilder addGaugesToMetricsRecordBuilder(MetricsRecordBuilde PERCENT_FILES_LOCAL_SECONDARY_REGIONS_DESC), rsWrap.getPercentFileLocalSecondaryRegions()) .addGauge(Interns.info(TOTAL_BYTES_READ, TOTAL_BYTES_READ_DESC), rsWrap.getTotalBytesRead()) .addGauge(Interns.info(LOCAL_BYTES_READ, LOCAL_BYTES_READ_DESC), rsWrap.getLocalBytesRead()) - .addGauge(Interns.info(LOCAL_RACK_BYTES_READ, LOCAL_RACK_BYTES_READ_DESC), rsWrap.getLocalRackBytesRead()) - .addGauge(Interns.info(REMOTE_RACK_BYTES_READ, REMOTE_RACK_BYTES_READ_DESC), rsWrap.getRemoteRackBytesRead()) + .addGauge(Interns.info(LOCAL_RACK_BYTES_READ, LOCAL_RACK_BYTES_READ_DESC), + rsWrap.getLocalRackBytesRead()) + .addGauge(Interns.info(REMOTE_RACK_BYTES_READ, REMOTE_RACK_BYTES_READ_DESC), + rsWrap.getRemoteRackBytesRead()) .addGauge(Interns.info(SHORTCIRCUIT_BYTES_READ, SHORTCIRCUIT_BYTES_READ_DESC), rsWrap.getShortCircuitBytesRead()) .addGauge(Interns.info(ZEROCOPY_BYTES_READ, ZEROCOPY_BYTES_READ_DESC), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java index da970f9d2f6a..aa7b4dee32fc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; -import org.apache.hbase.thirdparty.io.netty.handler.ssl.util.LazyX509Certificate; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,6 +74,7 @@ import org.apache.hbase.thirdparty.io.netty.handler.ssl.OptionalSslHandler; import org.apache.hbase.thirdparty.io.netty.handler.ssl.SslContext; import org.apache.hbase.thirdparty.io.netty.handler.ssl.SslHandler; +import org.apache.hbase.thirdparty.io.netty.handler.ssl.util.LazyX509Certificate; import org.apache.hbase.thirdparty.io.netty.util.concurrent.GlobalEventExecutor; /** @@ -456,7 +456,8 @@ static void sslHandshakeCompleteHandler(NettyServerRpcConnection conn, SslHandle if (certificates != null && certificates.length > 0) { X509Certificate[] x509Certificates = new X509Certificate[certificates.length]; for (int i = 0; i < certificates.length; i++) { - // Hack to work around https://github.com/netty/netty/issues/13796, remove once HBase uses Netty 4.1.107.Final or later + // Hack to work around https://github.com/netty/netty/issues/13796, remove once HBase uses + // Netty 4.1.107.Final or later if (certificates[i] instanceof LazyX509Certificate) { Method method = LazyX509Certificate.class.getDeclaredMethod("unwrap"); method.setAccessible(true); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index de1cb5793017..2bd3b8f08932 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -34,11 +34,12 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; + /** * An efficient array based implementation similar to ClusterState for keeping the status of the * cluster in terms of region assignment and distribution. LoadBalancers, such as @@ -53,68 +54,112 @@ class BalancerClusterState { private static final Logger LOG = LoggerFactory.getLogger(BalancerClusterState.class); - @Expose ServerName[] servers; + @Expose + ServerName[] servers; // ServerName uniquely identifies a region server. multiple RS can run on the same host - @Expose String[] hosts; - @Expose String[] racks; - @Expose boolean multiServersPerHost = false; // whether or not any host has more than one server - - @Expose ArrayList tables; - @Expose RegionInfo[] regions; - @Expose Deque[] regionLoads; + @Expose + String[] hosts; + @Expose + String[] racks; + @Expose + boolean multiServersPerHost = false; // whether or not any host has more than one server + + @Expose + ArrayList tables; + @Expose + RegionInfo[] regions; + @Expose + Deque[] regionLoads; private RegionLocationFinder regionFinder; - @Expose int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality - - @Expose int[] serverIndexToHostIndex; // serverIndex -> host index - @Expose int[] serverIndexToRackIndex; // serverIndex -> rack index - - @Expose int[][] regionsPerServer; // serverIndex -> region list - @Expose int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list - @Expose int[][] regionsPerHost; // hostIndex -> list of regions - @Expose int[][] regionsPerRack; // rackIndex -> region list - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated + @Expose + int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality + + @Expose + int[] serverIndexToHostIndex; // serverIndex -> host index + @Expose + int[] serverIndexToRackIndex; // serverIndex -> rack index + + @Expose + int[][] regionsPerServer; // serverIndex -> region list + @Expose + int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list + @Expose + int[][] regionsPerHost; // hostIndex -> list of regions + @Expose + int[][] regionsPerRack; // rackIndex -> region list + @Expose + Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated // replicas by primary region index - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by + @Expose + Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by // primary region index - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by + @Expose + Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by // primary region index - @Expose int[][] serversPerHost; // hostIndex -> list of server indexes - @Expose int[][] serversPerRack; // rackIndex -> list of server indexes - @Expose int[] regionIndexToServerIndex; // regionIndex -> serverIndex - @Expose int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) - @Expose int[] regionIndexToTableIndex; // regionIndex -> tableIndex - @Expose int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions - @Expose int[] numRegionsPerTable; // tableIndex -> region count - @Expose int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS - @Expose int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary - @Expose boolean hasRegionReplicas = false; // whether there is regions with replicas - - @Expose Integer[] serverIndicesSortedByRegionCount; - @Expose Integer[] serverIndicesSortedByLocality; - - @Expose Map serversToIndex; - @Expose Map hostsToIndex; - @Expose Map racksToIndex; - @Expose Map tablesToIndex; - @Expose Map regionsToIndex; - @Expose float[] localityPerServer; - - @Expose int numServers; - @Expose int numHosts; - @Expose int numRacks; - @Expose int numTables; - @Expose int numRegions; - - @Expose int numMovedRegions = 0; // num moved regions from the initial configuration - @Expose Map> clusterState; + @Expose + int[][] serversPerHost; // hostIndex -> list of server indexes + @Expose + int[][] serversPerRack; // rackIndex -> list of server indexes + @Expose + int[] regionIndexToServerIndex; // regionIndex -> serverIndex + @Expose + int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) + @Expose + int[] regionIndexToTableIndex; // regionIndex -> tableIndex + @Expose + int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions + @Expose + int[] numRegionsPerTable; // tableIndex -> region count + @Expose + int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + @Expose + int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary + @Expose + boolean hasRegionReplicas = false; // whether there is regions with replicas + + @Expose + Integer[] serverIndicesSortedByRegionCount; + @Expose + Integer[] serverIndicesSortedByLocality; + + @Expose + Map serversToIndex; + @Expose + Map hostsToIndex; + @Expose + Map racksToIndex; + @Expose + Map tablesToIndex; + @Expose + Map regionsToIndex; + @Expose + float[] localityPerServer; + + @Expose + int numServers; + @Expose + int numHosts; + @Expose + int numRacks; + @Expose + int numTables; + @Expose + int numRegions; + + @Expose + int numMovedRegions = 0; // num moved regions from the initial configuration + @Expose + Map> clusterState; private final RackManager rackManager; // Maps region -> rackIndex -> locality of region on rack - @Expose private float[][] rackLocalities; + @Expose + private float[][] rackLocalities; // Maps localityType -> region -> [server|rack]Index with highest locality - @Expose private int[][] regionsToMostLocalEntities; + @Expose + private int[][] regionsToMostLocalEntities; static class DefaultRackManager extends RackManager { @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 8f3e5f575913..10492c8ccb52 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -35,12 +35,14 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; import org.apache.hbase.thirdparty.com.google.common.collect.Sets; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; -@InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { +@InterfaceAudience.Private +class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { private static final int NO_SERVER = -1; private static final int NO_REGION = -1; private static final boolean DEBUG_MAJOR = false; @@ -49,7 +51,8 @@ private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); - @Override BalanceAction generate(BalancerClusterState cluster) { + @Override + BalanceAction generate(BalancerClusterState cluster) { if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { return BalanceAction.NULL_ACTION; } @@ -62,7 +65,9 @@ int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; Arrays.stream(cluster.regions) - .flatMap(region -> HubSpotCellCostFunction.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT).stream()) + .flatMap(region -> HubSpotCellCostFunction + .toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT) + .stream()) .forEach(cellOnRegion -> cellCounts[cellOnRegion]++); double[] cellPercents = new double[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int i = 0; i < cellCounts.length; i++) { @@ -70,52 +75,55 @@ } List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + IntStream.range(0, cluster.regionsPerServer.length) + .mapToObj(serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])) + .collect(Collectors.toList()); return generateAction(cluster, cellCounts, cellGroupSizesPerServer); } - private BalanceAction generateAction( - BalancerClusterState cluster, - int[] cellCounts, - List> cellGroupSizesPerServer - ) { - int targetRegionsPerServer = Ints.checkedCast( - (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - List regionCounts = Arrays.stream(cluster.regionsPerServer).map(regions -> regions.length) - .collect(Collectors.toList()); + private BalanceAction generateAction(BalancerClusterState cluster, int[] cellCounts, + List> cellGroupSizesPerServer) { + int targetRegionsPerServer = + Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + List regionCounts = Arrays.stream(cluster.regionsPerServer) + .map(regions -> regions.length).collect(Collectors.toList()); - List> bigServers = - cellGroupSizesPerServer.stream().filter(e -> e.keySet().size() > 7) - .collect(Collectors.toList()); + List> bigServers = cellGroupSizesPerServer.stream() + .filter(e -> e.keySet().size() > 7).collect(Collectors.toList()); Map collective = new HashMap<>(); bigServers.forEach(e -> e.forEach((k, v) -> collective.merge(k, v, Integer::sum))); List underloadedServers = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1).boxed() - .collect(Collectors.toList()); + .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1) + .boxed().collect(Collectors.toList()); - // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within plus/minus 1 of the target + // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within + // plus/minus 1 of the target if (!underloadedServers.isEmpty()) { List serversThatCanLoseOneRegion = IntStream.range(0, cluster.numServers) .filter(server -> cluster.regionsPerServer[server].length >= targetRegionsPerServer).boxed() .collect(Collectors.toList()); - return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, cellGroupSizesPerServer, cluster); + return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, + cellGroupSizesPerServer, cluster); } // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps - Pair cellOnServer = pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); + Pair cellOnServer = + pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); if (cellOnServer.getSecond() != NO_SERVER) { return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); } - // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces - // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can either swap - // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that subsequent rounds + // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller + // disconnected pieces + // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can + // either swap + // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that + // subsequent rounds // will use step (1) to repair the imbalance. cellOnServer = pickLeastFrequentCellOnMostLoadedServer(cellGroupSizesPerServer, cellCounts, cluster); @@ -124,47 +132,36 @@ private BalanceAction generateAction( return BalanceAction.NULL_ACTION; } - BalanceAction swapAttempt = giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); + BalanceAction swapAttempt = + giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); if (swapAttempt != BalanceAction.NULL_ACTION) { return swapAttempt; } - return giveAwaySomeRegionToImprove( - cellOnServer, - cellGroupSizesPerServer, - cellCounts, - cluster - ); + return giveAwaySomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cellCounts, cluster); } private Pair pickSecondMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, - int[] cellCounts, - BalancerClusterState cluster - ) { - return IntStream.range(0, cluster.numServers) - .boxed() + List> cellGroupSizesPerServer, int[] cellCounts, + BalancerClusterState cluster) { + return IntStream.range(0, cluster.numServers).boxed() .filter(server -> cellGroupSizesPerServer.get(server).size() > 1) - .map(server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) - .sorted(Comparator.comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) - .findFirst() - .orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); + .map( + server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) + .sorted(Comparator + .comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) + .findFirst().orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); } private short get2ndMostFrequentCell(Map countOfCells) { short mostFrequent = pickMostFrequentCell(countOfCells); - return countOfCells.keySet().stream() - .filter(cell -> cell != mostFrequent) - .max(Comparator.comparing(countOfCells::get)) - .get(); + return countOfCells.keySet().stream().filter(cell -> cell != mostFrequent) + .max(Comparator.comparing(countOfCells::get)).get(); } - private BalanceAction giveAwayRegionViaSwap( - Pair cellOnServer, - List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { + private BalanceAction giveAwayRegionViaSwap(Pair cellOnServer, + List> cellGroupSizesPerServer, BalancerClusterState cluster) { short sourceCell = cellOnServer.getFirst(); int sourceServer = cellOnServer.getSecond(); @@ -172,11 +169,10 @@ private BalanceAction giveAwayRegionViaSwap( Set sourceCells = sourceCellCounts.keySet(); Optional otherServerWithSharedCellAndMostOfTheCellToGiveAway = - IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> server != sourceServer) + IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) .filter(server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell)) - .filter(server -> Sets.intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) + .filter(server -> Sets + .intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(sourceCell))); if (!otherServerWithSharedCellAndMostOfTheCellToGiveAway.isPresent()) { @@ -192,13 +188,11 @@ private BalanceAction giveAwayRegionViaSwap( return swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); } - private BalanceAction moveRegionFromOverloadedToUnderloaded( - List overloadedServers, - List underloadedServers, - List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { - List overloadedServersMostToLeastCells = overloadedServers.stream().sorted( + private BalanceAction moveRegionFromOverloadedToUnderloaded(List overloadedServers, + List underloadedServers, List> cellGroupSizesPerServer, + BalancerClusterState cluster) { + List overloadedServersMostToLeastCells = overloadedServers.stream() + .sorted( Comparator.comparing(server -> -1 * cellGroupSizesPerServer.get(server).keySet().size())) .collect(Collectors.toList()); // if there's a server w/ excess that has a single instance of a cell that we already have, @@ -218,12 +212,8 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded( short cellToMove = singletonCellsOnSourceWeCanMoveToTarget.get( ThreadLocalRandom.current().nextInt(singletonCellsOnSourceWeCanMoveToTarget.size())); - return getAction( - source, - pickRegionForCell(cellsByRegionOnSource, cellToMove), - target, - -1 - ); + return getAction(source, pickRegionForCell(cellsByRegionOnSource, cellToMove), target, + -1); } } } @@ -231,23 +221,22 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded( int target = underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); - // if there's a server w/ excess that has a singleton cell we don't have but only one instance, accept it + // if there's a server w/ excess that has a singleton cell we don't have but only one instance, + // accept it // (0, neutral) for (int source : overloadedServersMostToLeastCells) { Map cellCountsOnServer = cellGroupSizesPerServer.get(source); short leastFrequentCell = pickLeastFrequentCell(cellCountsOnServer); if (cellCountsOnServer.get(leastFrequentCell) == 1) { - return getAction( - source, - pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), leastFrequentCell), - target, - NO_REGION - ); + return getAction(source, + pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), + leastFrequentCell), + target, NO_REGION); } } - - // ok, we give up. just pick a random region from the least loaded cell of some instance and call it a day + // ok, we give up. just pick a random region from the least loaded cell of some instance and + // call it a day // this will be (-1) but allows balancing to continue int source = overloadedServersMostToLeastCells.get( ThreadLocalRandom.current().nextInt(Math.min(overloadedServersMostToLeastCells.size(), 5))); @@ -255,18 +244,13 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded( Multimap cellsByRegionForSource = computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); - return getAction(source, - pickRegionForCell(cellsByRegionForSource, cellToMove), target, - NO_REGION - ); + return getAction(source, pickRegionForCell(cellsByRegionForSource, cellToMove), target, + NO_REGION); } - private BalanceAction giveAwaySomeRegionToImprove( - Pair cellOnServer, - List> cellGroupSizesPerServer, - int[] cellCounts, - BalancerClusterState cluster - ) { + private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnServer, + List> cellGroupSizesPerServer, int[] cellCounts, + BalancerClusterState cluster) { short cell = cellOnServer.getFirst(); int sourceServer = cellOnServer.getSecond(); @@ -274,10 +258,8 @@ private BalanceAction giveAwaySomeRegionToImprove( Map cellCountsOnSource = cellGroupSizesPerServer.get(sourceServer); Set cellsOnSource = cellCountsOnSource.keySet(); - - Optional otherServerWithThisCell = pickOtherServerWithThisCellToGiveItTo( - cell, sourceServer, cellGroupSizesPerServer, cluster - ); + Optional otherServerWithThisCell = + pickOtherServerWithThisCellToGiveItTo(cell, sourceServer, cellGroupSizesPerServer, cluster); int targetServer = NO_SERVER; @@ -296,12 +278,10 @@ private BalanceAction giveAwaySomeRegionToImprove( return BalanceAction.NULL_ACTION; } - MoveRegionAction action = (MoveRegionAction) getAction( - sourceServer, - pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), - targetServer, - NO_REGION - ); + MoveRegionAction action = (MoveRegionAction) getAction(sourceServer, + pickRegionForCell( + computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), + targetServer, NO_REGION); if (LOG.isDebugEnabled() || DEBUG_MINOR) { Map cellsOnTarget = cellGroupSizesPerServer.get(targetServer); @@ -315,41 +295,33 @@ private BalanceAction giveAwaySomeRegionToImprove( boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", -// action.getFromServer(), -// action.getRegion(), -// action.getToServer(), -// cell, -// cellCountsOnSource.get(cell), -// cellsOnTarget.get(cell), -// descrOfQuality -// ); - LOG.debug("Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", - action.getFromServer(), - action.getRegion(), - action.getToServer(), - cell, - cellCountsOnSource.get(cell), - cellsOnTarget.get(cell), - descrOfQuality - ); + String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" + : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" + : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" + : "BAD"; + + // System.out.printf( + // "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is + // %s\n", + // action.getFromServer(), + // action.getRegion(), + // action.getToServer(), + // cell, + // cellCountsOnSource.get(cell), + // cellsOnTarget.get(cell), + // descrOfQuality + // ); + LOG.debug( + "Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", + action.getFromServer(), action.getRegion(), action.getToServer(), cell, + cellCountsOnSource.get(cell), cellsOnTarget.get(cell), descrOfQuality); } return action; } - private Optional pickOtherLowerLoadedServerToGiveCell( - int sourceServer, - List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { + private Optional pickOtherLowerLoadedServerToGiveCell(int sourceServer, + List> cellGroupSizesPerServer, BalancerClusterState cluster) { List serversByCellCountAsc = IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) @@ -377,23 +349,16 @@ private Optional pickOtherLowerLoadedServerToGiveCell( return Optional.of(serverToPick).filter(server -> server != NO_SERVER); } - private Optional pickOtherServerWithThisCellToGiveItTo( - short cell, - int sourceServer, - List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { - return IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> server != sourceServer) + private Optional pickOtherServerWithThisCellToGiveItTo(short cell, int sourceServer, + List> cellGroupSizesPerServer, BalancerClusterState cluster) { + return IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) - .filter(server -> cluster.regionsPerServer[server].length <= Math.ceil((double) cluster.numRegions / cluster.numServers)) + .filter(server -> cluster.regionsPerServer[server].length + <= Math.ceil((double) cluster.numRegions / cluster.numServers)) .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cell))); } - private short pickLeastFrequentCell( - Map cellCounts - ) { + private short pickLeastFrequentCell(Map cellCounts) { short cellToPick = -1; int lowestCountSoFar = Integer.MAX_VALUE; double reservoirRandom = -1; @@ -416,9 +381,7 @@ private short pickLeastFrequentCell( return cellToPick; } - private short pickMostFrequentCell( - Map cellCounts - ) { + private short pickMostFrequentCell(Map cellCounts) { short cellToPick = -1; int highestCountSoFar = Integer.MIN_VALUE; double reservoirRandom = -1; @@ -458,12 +421,14 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, } Set sourceCandidateSet = new HashSet<>(); - for (int sourceServerCandidate = 0; sourceServerCandidate < cellGroupSizesPerServer.size(); sourceServerCandidate++) { + for (int sourceServerCandidate = 0; sourceServerCandidate + < cellGroupSizesPerServer.size(); sourceServerCandidate++) { if (sourceServerCandidate == targetServer) { continue; } - Map cellsOnSourceCandidate = cellGroupSizesPerServer.get(sourceServerCandidate); + Map cellsOnSourceCandidate = + cellGroupSizesPerServer.get(sourceServerCandidate); // if that server is perfectly isolated, don't allow that to be broken even to fix another if (cellsOnSourceCandidate.keySet().size() == 1) { @@ -479,57 +444,54 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, if (cellsInCommon.size() > 1) { short commonCellToSwap = cellsInCommon.stream().filter(cell -> cell != sourceCell).findAny().get(); - SwapRegionsAction action = swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); + SwapRegionsAction action = + swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSourceCandidate.size(); - int sourceNewTotal = cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); + int sourceNewTotal = + cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); + int targetNewTotal = cellCountsOnTargetServer.size() + - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " -// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", -// action.getFromServer(), -// action.getFromRegion(), -// action.getToServer(), -// action.getToRegion(), -// commonCellToSwap, -// cellCountsOnTargetServer.get(commonCellToSwap), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellsOnSourceCandidate.get(sourceCell), -// commonCellToSwap, -// cellsOnSourceCandidate.get(commonCellToSwap), -// descrOfQuality -// ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" + : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) + ? "GOOD" + : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" + : "BAD"; + + // System.out.printf( + // "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), + // " + // + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + // action.getFromServer(), + // action.getFromRegion(), + // action.getToServer(), + // action.getToRegion(), + // commonCellToSwap, + // cellCountsOnTargetServer.get(commonCellToSwap), + // sourceCell, + // cellCountsOnTargetServer.get(sourceCell), + // sourceCell, + // cellsOnSourceCandidate.get(sourceCell), + // commonCellToSwap, + // cellsOnSourceCandidate.get(commonCellToSwap), + // descrOfQuality + // ); + LOG.debug( + "Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - commonCellToSwap, - cellCountsOnTargetServer.get(commonCellToSwap), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSourceCandidate.get(sourceCell), - commonCellToSwap, - cellsOnSourceCandidate.get(commonCellToSwap), - descrOfQuality - ); + action.getFromServer(), action.getFromRegion(), action.getToServer(), + action.getToRegion(), commonCellToSwap, + cellCountsOnTargetServer.get(commonCellToSwap), sourceCell, + cellCountsOnTargetServer.get(sourceCell), sourceCell, + cellsOnSourceCandidate.get(sourceCell), commonCellToSwap, + cellsOnSourceCandidate.get(commonCellToSwap), descrOfQuality); } return action; } @@ -545,76 +507,63 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, int sourceServer = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); Map cellsOnSource = cellGroupSizesPerServer.get(sourceServer); - short targetCell = cellsOnTargetServer.stream() - .filter(cell -> cell != sourceCell) - .sorted(Comparator.comparing(cellCountsOnTargetServer::get)) - .findFirst() - .get(); + short targetCell = cellsOnTargetServer.stream().filter(cell -> cell != sourceCell) + .sorted(Comparator.comparing(cellCountsOnTargetServer::get)).findFirst().get(); - SwapRegionsAction action = swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); + SwapRegionsAction action = + swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSource.size(); int sourceNewTotal = cellsOnSource.size() - (cellsOnSource.get(sourceCell) == 1 ? 1 : 0); int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); + int targetNewTotal = + cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " -// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", -// action.getFromServer(), -// action.getFromRegion(), -// action.getToServer(), -// action.getToRegion(), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellsOnSource.get(sourceCell), -// sourceCell, -// cellsOnSource.get(sourceCell), -// descrOfQuality -// ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" + : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" + : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" + : "BAD"; + + // System.out.printf( + // "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " + // + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", + // action.getFromServer(), + // action.getFromRegion(), + // action.getToServer(), + // action.getToRegion(), + // sourceCell, + // cellCountsOnTargetServer.get(sourceCell), + // sourceCell, + // cellCountsOnTargetServer.get(sourceCell), + // sourceCell, + // cellsOnSource.get(sourceCell), + // sourceCell, + // cellsOnSource.get(sourceCell), + // descrOfQuality + // ); + LOG.debug( + "Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - descrOfQuality - ); + action.getFromServer(), action.getFromRegion(), action.getToServer(), action.getToRegion(), + sourceCell, cellCountsOnTargetServer.get(sourceCell), sourceCell, + cellCountsOnTargetServer.get(sourceCell), sourceCell, cellsOnSource.get(sourceCell), + sourceCell, cellsOnSource.get(sourceCell), descrOfQuality); } return action; } - private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, BalancerClusterState cluster) { - return (SwapRegionsAction) getAction( - fromServer, - resolveCellToRegion(cluster, fromServer, fromCell), - toServer, - resolveCellToRegion(cluster, toServer, toCell) - ); + private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, + BalancerClusterState cluster) { + return (SwapRegionsAction) getAction(fromServer, + resolveCellToRegion(cluster, fromServer, fromCell), toServer, + resolveCellToRegion(cluster, toServer, toCell)); } private int resolveCellToRegion(BalancerClusterState cluster, int server, short cell) { @@ -623,22 +572,18 @@ private int resolveCellToRegion(BalancerClusterState cluster, int server, short return pickRegionForCell(cellsByRegion, cell); } - private SwapRegionsAction swap( - int receivingServer, - short cellToGiveToReceivingServer, - int offeringServer, - short cellToOfferFromReceivingServerToOrigin, - BalancerClusterState cluster - ) { + private SwapRegionsAction swap(int receivingServer, short cellToGiveToReceivingServer, + int offeringServer, short cellToOfferFromReceivingServerToOrigin, + BalancerClusterState cluster) { Multimap cellsByRegionForReceivingServer = computeCellsByRegion(cluster.regionsPerServer[receivingServer], cluster.regions); Multimap cellsByRegionForOfferingServer = computeCellsByRegion(cluster.regionsPerServer[offeringServer], cluster.regions); - return (SwapRegionsAction) getAction( - offeringServer, pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), - receivingServer, pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin) - ); + return (SwapRegionsAction) getAction(offeringServer, + pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), + receivingServer, + pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin)); } private int pickRegionForCell(Multimap cellsByRegionOnServer, short cellToMove) { @@ -650,14 +595,16 @@ private int pickRegionForCell(Multimap cellsByRegionOnServer, sh static List computeCellsPerRs(BalancerClusterState cluster) { List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + IntStream.range(0, cluster.regionsPerServer.length) + .mapToObj(serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])) + .collect(Collectors.toList()); return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); } private Pair pickMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { + List> cellGroupSizesPerServer, int[] cellCounts, + BalancerClusterState cluster) { cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; @@ -672,26 +619,26 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( int[] regionsForServer = regionsPerServer[serverIndex]; Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); - Set cellsOnThisServerAndOthers = - cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) - .collect(Collectors.toSet()); + Set cellsOnThisServerAndOthers = cellsOnServer.keySet().stream() + .filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]).collect(Collectors.toSet()); - if (cellsOnServer.keySet().size() <= targetCellsPerServer - // if we have a small cell where the entire cell is local, we MUST have at least 2 cells on this server to have - // an overall region balance, so allow us to go over the target by 1 cell - || cellsOnThisServerAndOthers.size() == 1) { + if ( + cellsOnServer.keySet().size() <= targetCellsPerServer + // if we have a small cell where the entire cell is local, we MUST have at least 2 cells + // on this server to have + // an overall region balance, so allow us to go over the target by 1 cell + || cellsOnThisServerAndOthers.size() == 1 + ) { continue; } - List> cellsByFrequencyAsc = - cellsOnServer.entrySet().stream().sorted(Map.Entry.comparingByValue()) - .collect(Collectors.toList()); + List> cellsByFrequencyAsc = cellsOnServer.entrySet().stream() + .sorted(Map.Entry.comparingByValue()).collect(Collectors.toList()); if (cellsByFrequencyAsc.isEmpty()) { continue; } - int probe = cellsByFrequencyAsc.size() - 1; short mostFrequentCellTemp = -1; int mostFrequentCellCountTemp = -1; @@ -701,7 +648,7 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( mostFrequentCellTemp = entry.getKey(); mostFrequentCellCountTemp = entry.getValue(); probe--; - } while(mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); + } while (mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); final short mostFrequentCell = mostFrequentCellTemp; final int mostFrequentCellCount = mostFrequentCellCountTemp; @@ -714,7 +661,8 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( long numServersWithMostFrequentCellNotSaturated = cellGroupSizesPerServer.stream().filter(cellMap -> cellMap.containsKey(mostFrequentCell)) .filter(cellMap -> cellMap.keySet().size() > 1).count(); - // if we're down to only one server unsaturated with the most frequent cell, there are no good swaps + // if we're down to only one server unsaturated with the most frequent cell, there are no good + // swaps if (numServersWithMostFrequentCellNotSaturated == 1) { continue; } @@ -744,10 +692,8 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( } private Pair pickLeastFrequentCellOnMostLoadedServer( - List> cellGroupSizesPerServer, - int[] cellCounts, - BalancerClusterState cluster - ) { + List> cellGroupSizesPerServer, int[] cellCounts, + BalancerClusterState cluster) { int targetCellsPerServer = Ints.checkedCast( (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); @@ -755,14 +701,16 @@ private Pair pickLeastFrequentCellOnMostLoadedServer( .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) .collect(Collectors.toList()).get(cluster.numServers - 1); - Map cellCountsForHighestLoadedServer = cellGroupSizesPerServer.get(highestLoadedServer); + Map cellCountsForHighestLoadedServer = + cellGroupSizesPerServer.get(highestLoadedServer); int numCellsOnHighestLoadedServer = cellCountsForHighestLoadedServer.keySet().size(); if (numCellsOnHighestLoadedServer <= targetCellsPerServer + 1) { return Pair.newPair((short) -1, -1); } - return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), highestLoadedServer); + return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), + highestLoadedServer); } private static Map computeCellGroupSizes(BalancerClusterState cluster, @@ -792,16 +740,16 @@ private static Map computeCellGroupSizes(BalancerClusterState cl byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - short startCellId = (startKey == null || startKey.length == 0) ? - 0 : - (startKey.length >= 2 ? - Bytes.toShort(startKey, 0, 2) : - Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : - (endKey.length >= 2 ? - Bytes.toShort(endKey, 0, 2) : - Bytes.toShort(new byte[] { -1, endKey[0] })); + short startCellId = (startKey == null || startKey.length == 0) + ? 0 + : (startKey.length >= 2 + ? Bytes.toShort(startKey, 0, 2) + : Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) + ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) + : (endKey.length >= 2 + ? Bytes.toShort(endKey, 0, 2) + : Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; @@ -845,16 +793,16 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - short startCellId = (startKey == null || startKey.length == 0) ? - 0 : - (startKey.length >= 2 ? - Bytes.toShort(startKey, 0, 2) : - Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : - (endKey.length >= 2 ? - Bytes.toShort(endKey, 0, 2) : - Bytes.toShort(new byte[] { -1, endKey[0] })); + short startCellId = (startKey == null || startKey.length == 0) + ? 0 + : (startKey.length >= 2 + ? Bytes.toShort(startKey, 0, 2) + : Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) + ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) + : (endKey.length >= 2 + ? Bytes.toShort(endKey, 0, 2) + : Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 27c95460894b..8f7593019f1c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -36,6 +36,7 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; @@ -67,8 +68,10 @@ public class HubSpotCellCostFunction extends CostFunction { private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - static class Int2IntCounterMapAdapter implements JsonSerializer, JsonDeserializer { - @Override public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, + static class Int2IntCounterMapAdapter + implements JsonSerializer, JsonDeserializer { + @Override + public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, JsonSerializationContext context) { JsonObject obj = new JsonObject(); @@ -99,7 +102,8 @@ static class Int2IntCounterMapAdapter implements JsonSerializer { - JsonObject obj = json.getAsJsonObject(); + static final Gson OBJECT_MAPPER = + new GsonBuilder().excludeFieldsWithoutExposeAnnotation().enableComplexMapKeySerialization() + .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) + .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { + JsonObject obj = json.getAsJsonObject(); - boolean split = obj.get("split").getAsBoolean(); - long regionId = obj.get("regionId").getAsLong(); - int replicaId = obj.get("replicaId").getAsInt(); - JsonObject tableName = obj.get("tableName").getAsJsonObject(); - JsonArray startKey = obj.get("startKey").getAsJsonArray(); - JsonArray endKey = obj.get("endKey").getAsJsonArray(); + boolean split = obj.get("split").getAsBoolean(); + long regionId = obj.get("regionId").getAsLong(); + int replicaId = obj.get("replicaId").getAsInt(); + JsonObject tableName = obj.get("tableName").getAsJsonObject(); + JsonArray startKey = obj.get("startKey").getAsJsonArray(); + JsonArray endKey = obj.get("endKey").getAsJsonArray(); - byte[] startKeyBytes = new byte[startKey.size()]; - byte[] endKeyBytes = new byte[endKey.size()]; + byte[] startKeyBytes = new byte[startKey.size()]; + byte[] endKeyBytes = new byte[endKey.size()]; - for (int i = 0; i < startKey.size(); i++) { - startKeyBytes[i] = startKey.get(i).getAsByte(); - } - for (int i = 0; i < endKey.size(); i++) { - endKeyBytes[i] = endKey.get(i).getAsByte(); - } + for (int i = 0; i < startKey.size(); i++) { + startKeyBytes[i] = startKey.get(i).getAsByte(); + } + for (int i = 0; i < endKey.size(); i++) { + endKeyBytes[i] = endKey.get(i).getAsByte(); + } - TableName tb = TableName.valueOf( - tableName.get("namespaceAsString").getAsString(), - tableName.get("qualifierAsString").getAsString() - ); + TableName tb = TableName.valueOf(tableName.get("namespaceAsString").getAsString(), + tableName.get("qualifierAsString").getAsString()); - RegionInfo result = - RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) + RegionInfo result = RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) .setReplicaId(replicaId).setStartKey(startKeyBytes).setEndKey(endKeyBytes).build(); - return result; - }) - .addDeserializationExclusionStrategy(new ExclusionStrategy() { - @Override public boolean shouldSkipField(FieldAttributes f) { - return f.getName().equals("serversToIndex") - || f.getName().equals("regionsToIndex") - || f.getName().equals("clusterState") - ; - } + return result; + }).addDeserializationExclusionStrategy(new ExclusionStrategy() { + @Override + public boolean shouldSkipField(FieldAttributes f) { + return f.getName().equals("serversToIndex") || f.getName().equals("regionsToIndex") + || f.getName().equals("clusterState"); + } - @Override public boolean shouldSkipClass(Class clazz) { - return false; - } - }) - .create(); + @Override + public boolean shouldSkipClass(Class clazz) { + return false; + } + }).create(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; @@ -219,10 +217,9 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - if (LOG.isTraceEnabled() - && cluster.tables.contains("objects-3") - && cluster.regions != null - && cluster.regions.length > 0 + if ( + LOG.isTraceEnabled() && cluster.tables.contains("objects-3") && cluster.regions != null + && cluster.regions.length > 0 ) { try { LOG.trace("{} cluster state:\n{}", cluster.tables, OBJECT_MAPPER.toJson(cluster)); @@ -234,43 +231,38 @@ void prepare(BalancerClusterState cluster) { this.serverHasCell = new boolean[numServers][numCells]; this.bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); this.numRegionCellsOverassigned = - calculateCurrentCellCost( - numCells, - numServers, - bestCaseMaxCellsPerServer, - regions, regionIndexToServerIndex, - serverHasCell, - super.cluster::getRegionSizeMB - ); - - if (regions.length > 0 - && regions[0].getTable().getNamespaceAsString().equals("default") - && LOG.isTraceEnabled() + calculateCurrentCellCost(numCells, numServers, bestCaseMaxCellsPerServer, regions, + regionIndexToServerIndex, serverHasCell, super.cluster::getRegionSizeMB); + + if ( + regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") + && LOG.isTraceEnabled() ) { - LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), snapshotState()); + LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), + snapshotState()); } } - @Override boolean isNeeded() { + @Override + boolean isNeeded() { return cluster.tables.stream().anyMatch(name -> name.contains("objects-3")); } - @Override protected void regionMoved(int region, int oldServer, int newServer) { + @Override + protected void regionMoved(int region, int oldServer, int newServer) { RegionInfo movingRegion = regions[region]; if (!movingRegion.getTable().getNamespaceAsString().equals("default")) { return; } - Set cellsOnRegion = toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + Set cellsOnRegion = + toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); if (LOG.isDebugEnabled()) { - LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", - region, - Bytes.toHex(movingRegion.getStartKey()), - Bytes.toHex(movingRegion.getEndKey()), - cellsOnRegion - ); + LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", region, + Bytes.toHex(movingRegion.getStartKey()), Bytes.toHex(movingRegion.getEndKey()), + cellsOnRegion); } Map numRegionsForCellOnOldServer = computeCellFrequencyForServer(oldServer); @@ -282,13 +274,8 @@ void prepare(BalancerClusterState cluster) { if (LOG.isDebugEnabled()) { LOG.debug( "Old server {} [{}] has cell frequency of {}.\n\nNew server {} [{}] has cell frequency of {}.", - oldServer, - currentCellCountOldServer, - numRegionsForCellOnOldServer, - newServer, - currentCellCountNewServer, - numRegionsForCellOnNewServer - ); + oldServer, currentCellCountOldServer, numRegionsForCellOnOldServer, newServer, + currentCellCountNewServer, numRegionsForCellOnNewServer); } int changeInOverassignedRegionCells = 0; @@ -313,7 +300,8 @@ void prepare(BalancerClusterState cluster) { } if (LOG.isDebugEnabled()) { - LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, changeInOverassignedRegionCells); + LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, + changeInOverassignedRegionCells); } numRegionCellsOverassigned += changeInOverassignedRegionCells; @@ -328,10 +316,7 @@ private Map computeCellFrequencyForServer(int server) { cellsInRegion.forEach(cell -> regionsByCell.put(cell, regionIndex)); } - return regionsByCell.build() - .asMap() - .entrySet() - .stream() + return regionsByCell.build().asMap().entrySet().stream() .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().size())); } @@ -391,15 +376,9 @@ protected double cost() { return numRegionCellsOverassigned; } - static int calculateCurrentCellCost( - short numCells, - int numServers, - int bestCaseMaxCellsPerServer, - RegionInfo[] regions, - int[] regionLocations, - boolean[][] serverHasCell, - Function getRegionSizeMbFunc - ) { + static int calculateCurrentCellCost(short numCells, int numServers, int bestCaseMaxCellsPerServer, + RegionInfo[] regions, int[] regionLocations, boolean[][] serverHasCell, + Function getRegionSizeMbFunc) { Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); @@ -465,12 +444,8 @@ static int calculateCurrentCellCost( return cost; } - private static void setCellsForServer( - boolean[] serverHasCell, - byte[] startKey, - byte[] endKey, - short numCells - ) { + private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, byte[] endKey, + short numCells) { short startCellId = (startKey == null || startKey.length == 0) ? 0 : (startKey.length >= 2 @@ -502,7 +477,8 @@ private static void setCellsForServer( } static boolean isStopExclusive(byte[] endKey) { - return endKey != null && endKey.length == 2 || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); + return endKey != null && endKey.length == 2 + || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); } static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index 85b15599e580..6d3857944f7c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -250,10 +250,12 @@ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { - LOG.debug("Region {} is located on {}", regionNameAsString, tableDescriptor.getTableName().getNameAsString()); + LOG.debug("Region {} is located on {}", regionNameAsString, + tableDescriptor.getTableName().getNameAsString()); HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); - LOG.debug("Top hosts for region {}: {}", regionNameAsString, blocksDistribution.getTopHosts()); + LOG.debug("Top hosts for region {}: {}", regionNameAsString, + blocksDistribution.getTopHosts()); return blocksDistribution; } } catch (IOException ioe) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index c7f872fc0844..3dae3fe0bd6e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -234,7 +234,8 @@ protected List createCandidateGenerators() { candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), new RegionReplicaRackCandidateGenerator()); - candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); + candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), + new HubSpotCellBasedCandidateGenerator()); return candidateGenerators; } @@ -564,7 +565,8 @@ protected List balanceTable(TableName tableName, LOG.info( "[{}] Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "functionCost={} computedMaxSteps={}", - tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), computedMaxSteps); + tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), + computedMaxSteps); final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. @@ -582,9 +584,9 @@ protected List balanceTable(TableName tableName, newCost = computeCost(cluster, currentCost); - if(LOG.isTraceEnabled()) { - LOG.trace("S[{}]: {} -> {} via {} -- {}", - step, currentCost, newCost, action, totalCostsPerFunc()); + if (LOG.isTraceEnabled()) { + LOG.trace("S[{}]: {} -> {} via {} -- {}", step, currentCost, newCost, action, + totalCostsPerFunc()); } // Should this be kept? @@ -618,15 +620,16 @@ protected List balanceTable(TableName tableName, + " to try {} different iterations. Found a solution that moves " + "{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. funtionCost={}", - tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), initCost / sumMultiplier, - currentCost / sumMultiplier, functionCost()); + tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), + initCost / sumMultiplier, currentCost / sumMultiplier, functionCost()); sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); return plans; } LOG.info( "[{}] Could not find a better moving plan. Tried {} different configurations in " + "{} ms, and did not find anything with an imbalance score less than {}", - tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, initCost / sumMultiplier); + tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, + initCost / sumMultiplier); return null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java new file mode 100644 index 000000000000..e73b979356d8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.normalizer; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class HubspotCellAwareNormalizer extends SimpleRegionNormalizer { + private static final Logger LOG = LoggerFactory.getLogger(HubspotCellAwareNormalizer.class); + + @Override + public List computePlansForTable(TableDescriptor tableDescriptor) { + List allPlans = super.computePlansForTable(tableDescriptor); + List filteredPlans = new ArrayList<>(allPlans.size()); + + for (NormalizationPlan plan : allPlans) { + boolean shouldInclude = shouldIncludePlan(plan); + if (shouldInclude) { + filteredPlans.add(plan); + } else { + LOG.info("Skipping plan: {}", plan); + } + } + + return filteredPlans; + } + + private static boolean shouldIncludePlan(NormalizationPlan plan) { + switch (plan.getType()) { + case MERGE: + return shouldIncludeMergePlan((MergeNormalizationPlan) plan); + case NONE: + case SPLIT: + return true; + default: + throw new RuntimeException("Unknown plan type: " + plan.getType()); + } + } + + private static boolean shouldIncludeMergePlan(MergeNormalizationPlan plan) { + List targets = plan.getNormalizationTargets(); + + if (targets.size() <= 1) { + return true; + } + + byte[] endKey = targets.get(0).getRegionInfo().getEndKey(); + short cell = Bytes.toShort(endKey); + + for (int i = 1; i < targets.size(); ++i) { + endKey = targets.get(i).getRegionInfo().getEndKey(); + if (cell != Bytes.toShort(endKey)) { + return false; + } + } + + return true; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index f97622b40631..71fe20be79c6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -18,12 +18,10 @@ package org.apache.hadoop.hbase.master.normalizer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -58,9 +56,12 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio */ private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer - Class balancerKlass = - conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - RegionNormalizer.class); - return ReflectionUtils.newInstance(balancerKlass, conf); + // Class balancerKlass = + // conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + // RegionNormalizer.class); + // return ReflectionUtils.newInstance(balancerKlass, conf); + + // HACK + return new HubspotCellAwareNormalizer(); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index fa9f358883ec..cbbf33edadba 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -105,20 +105,13 @@ public void testCellCountBothEndsNull() { @Test public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost - ((short) 4, - 4, - 1, - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, - new int[] { 0 , 1 , 2 , 3 }, - new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, - ALL_REGIONS_SIZE_1_MB - ); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, 1, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[] { 0, 1, 2, 3 }, + new boolean[][] { { false, false, false, false }, { false, false, false, false }, + { false, false, false, false }, { false, false, false, false } }, + ALL_REGIONS_SIZE_1_MB); assertEquals(0, cost); } @@ -126,18 +119,12 @@ public void testCostBalanced() { @Test public void testCostImbalanced() { // 4 cells, 4 servers, imbalanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost( - (short) 4, - 4, - 1, - new RegionInfo[] { - buildRegionInfo(null, (short) 1), - buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), - buildRegionInfo((short) 3, null) - }, - new int[] { 0 , 0 , 0 , 0 }, - new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, + int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, 1, + new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, + new int[] { 0, 0, 0, 0 }, + new boolean[][] { { false, false, false, false }, { false, false, false, false }, + { false, false, false, false }, { false, false, false, false } }, ALL_REGIONS_SIZE_1_MB); assertTrue(cost > 0); } From 9eedc866a1df299d6cb942f2be56570c941c3ab7 Mon Sep 17 00:00:00 2001 From: Hernan Gelaf-Romer Date: Tue, 26 Nov 2024 16:16:11 -0500 Subject: [PATCH 083/126] Revert "add custom normalizer" This reverts commit 20cbb95aaf51452e3197fe375ca418f48c033162. --- .../hbase/client/MutableRegionInfo.java | 36 +- .../org/apache/hadoop/hbase/ServerName.java | 11 +- .../org/apache/hadoop/hbase/TableName.java | 26 +- .../org/apache/hadoop/hbase/net/Address.java | 5 +- .../MetricsRegionServerSource.java | 6 +- .../MetricsRegionServerSourceImpl.java | 6 +- .../hadoop/hbase/ipc/NettyRpcServer.java | 5 +- .../master/balancer/BalancerClusterState.java | 149 ++--- .../HubSpotCellBasedCandidateGenerator.java | 534 ++++++++++-------- .../balancer/HubSpotCellCostFunction.java | 170 +++--- .../master/balancer/RegionLocationFinder.java | 6 +- .../balancer/StochasticLoadBalancer.java | 19 +- .../HubspotCellAwareNormalizer.java | 78 --- .../normalizer/RegionNormalizerFactory.java | 13 +- .../balancer/TestHubSpotCellCostFunction.java | 39 +- 15 files changed, 517 insertions(+), 586 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index 779474c8e291..81e6d478b79d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -23,12 +23,11 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; - /** * An implementation of RegionInfo that adds mutable methods so can build a RegionInfo instance. * Package private. Use {@link RegionInfoBuilder} creating instances of {@link RegionInfo}s. @@ -56,28 +55,17 @@ class MutableRegionInfo implements RegionInfo { // zookeeper as of 0.90.0 HBase. And now in DisableTableProcedure, finally we will create bunch // of UnassignProcedures and at the last of the procedure we will set the region state to // CLOSED, and will not change the offLine flag. - @Expose - private boolean offLine; - @Expose - private boolean split; - @Expose - private final long regionId; - @Expose - private final int replicaId; - @Expose - private final byte[] regionName; - @Expose - private final byte[] startKey; - @Expose - private final byte[] endKey; - @Expose - private final int hashCode; - @Expose - private final String encodedName; - @Expose - private final byte[] encodedNameAsBytes; - @Expose - private final TableName tableName; + @Expose private boolean offLine; + @Expose private boolean split; + @Expose private final long regionId; + @Expose private final int replicaId; + @Expose private final byte[] regionName; + @Expose private final byte[] startKey; + @Expose private final byte[] endKey; + @Expose private final int hashCode; + @Expose private final String encodedName; + @Expose private final byte[] encodedNameAsBytes; + @Expose private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, final byte[] endKey, final long regionId, final int replicaId, boolean offLine, diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 319b6f8474ea..9f32e64b9ff4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -26,13 +26,13 @@ import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.collect.Interner; import org.apache.hbase.thirdparty.com.google.common.collect.Interners; import org.apache.hbase.thirdparty.com.google.common.net.InetAddresses; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * Name of a particular incarnation of an HBase Server. A {@link ServerName} is used uniquely @@ -83,18 +83,15 @@ public class ServerName implements Comparable, Serializable { */ public static final String UNKNOWN_SERVERNAME = "#unknown#"; - @Expose - private final String serverName; - @Expose - private final long startCode; + @Expose private final String serverName; + @Expose private final long startCode; private transient Address address; /** * Cached versioned bytes of this ServerName instance. * @see #getVersionedBytes() */ - @Expose - private byte[] bytes; + @Expose private byte[] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList<>(0); /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index ad793209d5e1..0b077b724786 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -24,10 +24,10 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * Immutable POJO class for representing a table name. Which is of the form: <table @@ -94,22 +94,14 @@ public static boolean isMetaTableName(final TableName tn) { */ public static final TableName OLD_META_TABLE_NAME = getADummyTableName(OLD_META_STR); - @Expose - private final byte[] name; - @Expose - private final String nameAsString; - @Expose - private final byte[] namespace; - @Expose - private final String namespaceAsString; - @Expose - private final byte[] qualifier; - @Expose - private final String qualifierAsString; - @Expose - private final boolean systemTable; - @Expose - private final int hashCode; + @Expose private final byte[] name; + @Expose private final String nameAsString; + @Expose private final byte[] namespace; + @Expose private final String namespaceAsString; + @Expose private final byte[] qualifier; + @Expose private final String qualifierAsString; + @Expose private final boolean systemTable; + @Expose private final int hashCode; /** * Check passed byte array, "tableName", is legal user-space table name. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index 9b9d74b21985..5b35bfbd0edb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -21,11 +21,11 @@ import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.StringUtils; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.net.HostAndPort; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; /** * An immutable type to hold a hostname and port combo, like an Endpoint or @@ -38,8 +38,7 @@ */ @InterfaceAudience.Public public class Address implements Comparable
{ - @Expose - private final HostAndPort hostAndPort; + @Expose private final HostAndPort hostAndPort; private Address(HostAndPort hostAndPort) { this.hostAndPort = hostAndPort; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index 072b2e171858..dca25ffde417 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -524,12 +524,10 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo String ZEROCOPY_BYTES_READ_DESC = "The number of bytes read through HDFS zero copy"; String LOCAL_RACK_BYTES_READ = "localRackBytesRead"; - String LOCAL_RACK_BYTES_READ_DESC = - "The number of bytes read from the same rack of the RegionServer, but not the local HDFS DataNode"; + String LOCAL_RACK_BYTES_READ_DESC = "The number of bytes read from the same rack of the RegionServer, but not the local HDFS DataNode"; String REMOTE_RACK_BYTES_READ = "remoteRackBytesRead"; - String REMOTE_RACK_BYTES_READ_DESC = - "The number of bytes read from a different rack from that of the RegionServer"; + String REMOTE_RACK_BYTES_READ_DESC = "The number of bytes read from a different rack from that of the RegionServer"; String BLOCKED_REQUESTS_COUNT = "blockedRequestCount"; String BLOCKED_REQUESTS_COUNT_DESC = "The number of blocked requests because of memstore size is " diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index e6402b847015..1526ca3f5a8c 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -553,10 +553,8 @@ private MetricsRecordBuilder addGaugesToMetricsRecordBuilder(MetricsRecordBuilde PERCENT_FILES_LOCAL_SECONDARY_REGIONS_DESC), rsWrap.getPercentFileLocalSecondaryRegions()) .addGauge(Interns.info(TOTAL_BYTES_READ, TOTAL_BYTES_READ_DESC), rsWrap.getTotalBytesRead()) .addGauge(Interns.info(LOCAL_BYTES_READ, LOCAL_BYTES_READ_DESC), rsWrap.getLocalBytesRead()) - .addGauge(Interns.info(LOCAL_RACK_BYTES_READ, LOCAL_RACK_BYTES_READ_DESC), - rsWrap.getLocalRackBytesRead()) - .addGauge(Interns.info(REMOTE_RACK_BYTES_READ, REMOTE_RACK_BYTES_READ_DESC), - rsWrap.getRemoteRackBytesRead()) + .addGauge(Interns.info(LOCAL_RACK_BYTES_READ, LOCAL_RACK_BYTES_READ_DESC), rsWrap.getLocalRackBytesRead()) + .addGauge(Interns.info(REMOTE_RACK_BYTES_READ, REMOTE_RACK_BYTES_READ_DESC), rsWrap.getRemoteRackBytesRead()) .addGauge(Interns.info(SHORTCIRCUIT_BYTES_READ, SHORTCIRCUIT_BYTES_READ_DESC), rsWrap.getShortCircuitBytesRead()) .addGauge(Interns.info(ZEROCOPY_BYTES_READ, ZEROCOPY_BYTES_READ_DESC), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java index aa7b4dee32fc..da970f9d2f6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; +import org.apache.hbase.thirdparty.io.netty.handler.ssl.util.LazyX509Certificate; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,7 +75,6 @@ import org.apache.hbase.thirdparty.io.netty.handler.ssl.OptionalSslHandler; import org.apache.hbase.thirdparty.io.netty.handler.ssl.SslContext; import org.apache.hbase.thirdparty.io.netty.handler.ssl.SslHandler; -import org.apache.hbase.thirdparty.io.netty.handler.ssl.util.LazyX509Certificate; import org.apache.hbase.thirdparty.io.netty.util.concurrent.GlobalEventExecutor; /** @@ -456,8 +456,7 @@ static void sslHandshakeCompleteHandler(NettyServerRpcConnection conn, SslHandle if (certificates != null && certificates.length > 0) { X509Certificate[] x509Certificates = new X509Certificate[certificates.length]; for (int i = 0; i < certificates.length; i++) { - // Hack to work around https://github.com/netty/netty/issues/13796, remove once HBase uses - // Netty 4.1.107.Final or later + // Hack to work around https://github.com/netty/netty/issues/13796, remove once HBase uses Netty 4.1.107.Final or later if (certificates[i] instanceof LazyX509Certificate) { Method method = LazyX509Certificate.class.getDeclaredMethod("unwrap"); method.setAccessible(true); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 2bd3b8f08932..de1cb5793017 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -34,12 +34,11 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; - /** * An efficient array based implementation similar to ClusterState for keeping the status of the * cluster in terms of region assignment and distribution. LoadBalancers, such as @@ -54,112 +53,68 @@ class BalancerClusterState { private static final Logger LOG = LoggerFactory.getLogger(BalancerClusterState.class); - @Expose - ServerName[] servers; + @Expose ServerName[] servers; // ServerName uniquely identifies a region server. multiple RS can run on the same host - @Expose - String[] hosts; - @Expose - String[] racks; - @Expose - boolean multiServersPerHost = false; // whether or not any host has more than one server - - @Expose - ArrayList tables; - @Expose - RegionInfo[] regions; - @Expose - Deque[] regionLoads; + @Expose String[] hosts; + @Expose String[] racks; + @Expose boolean multiServersPerHost = false; // whether or not any host has more than one server + + @Expose ArrayList tables; + @Expose RegionInfo[] regions; + @Expose Deque[] regionLoads; private RegionLocationFinder regionFinder; - @Expose - int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality - - @Expose - int[] serverIndexToHostIndex; // serverIndex -> host index - @Expose - int[] serverIndexToRackIndex; // serverIndex -> rack index - - @Expose - int[][] regionsPerServer; // serverIndex -> region list - @Expose - int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list - @Expose - int[][] regionsPerHost; // hostIndex -> list of regions - @Expose - int[][] regionsPerRack; // rackIndex -> region list - @Expose - Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated + @Expose int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality + + @Expose int[] serverIndexToHostIndex; // serverIndex -> host index + @Expose int[] serverIndexToRackIndex; // serverIndex -> rack index + + @Expose int[][] regionsPerServer; // serverIndex -> region list + @Expose int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list + @Expose int[][] regionsPerHost; // hostIndex -> list of regions + @Expose int[][] regionsPerRack; // rackIndex -> region list + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated // replicas by primary region index - @Expose - Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by // primary region index - @Expose - Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by // primary region index - @Expose - int[][] serversPerHost; // hostIndex -> list of server indexes - @Expose - int[][] serversPerRack; // rackIndex -> list of server indexes - @Expose - int[] regionIndexToServerIndex; // regionIndex -> serverIndex - @Expose - int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) - @Expose - int[] regionIndexToTableIndex; // regionIndex -> tableIndex - @Expose - int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions - @Expose - int[] numRegionsPerTable; // tableIndex -> region count - @Expose - int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS - @Expose - int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary - @Expose - boolean hasRegionReplicas = false; // whether there is regions with replicas - - @Expose - Integer[] serverIndicesSortedByRegionCount; - @Expose - Integer[] serverIndicesSortedByLocality; - - @Expose - Map serversToIndex; - @Expose - Map hostsToIndex; - @Expose - Map racksToIndex; - @Expose - Map tablesToIndex; - @Expose - Map regionsToIndex; - @Expose - float[] localityPerServer; - - @Expose - int numServers; - @Expose - int numHosts; - @Expose - int numRacks; - @Expose - int numTables; - @Expose - int numRegions; - - @Expose - int numMovedRegions = 0; // num moved regions from the initial configuration - @Expose - Map> clusterState; + @Expose int[][] serversPerHost; // hostIndex -> list of server indexes + @Expose int[][] serversPerRack; // rackIndex -> list of server indexes + @Expose int[] regionIndexToServerIndex; // regionIndex -> serverIndex + @Expose int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) + @Expose int[] regionIndexToTableIndex; // regionIndex -> tableIndex + @Expose int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions + @Expose int[] numRegionsPerTable; // tableIndex -> region count + @Expose int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + @Expose int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary + @Expose boolean hasRegionReplicas = false; // whether there is regions with replicas + + @Expose Integer[] serverIndicesSortedByRegionCount; + @Expose Integer[] serverIndicesSortedByLocality; + + @Expose Map serversToIndex; + @Expose Map hostsToIndex; + @Expose Map racksToIndex; + @Expose Map tablesToIndex; + @Expose Map regionsToIndex; + @Expose float[] localityPerServer; + + @Expose int numServers; + @Expose int numHosts; + @Expose int numRacks; + @Expose int numTables; + @Expose int numRegions; + + @Expose int numMovedRegions = 0; // num moved regions from the initial configuration + @Expose Map> clusterState; private final RackManager rackManager; // Maps region -> rackIndex -> locality of region on rack - @Expose - private float[][] rackLocalities; + @Expose private float[][] rackLocalities; // Maps localityType -> region -> [server|rack]Index with highest locality - @Expose - private int[][] regionsToMostLocalEntities; + @Expose private int[][] regionsToMostLocalEntities; static class DefaultRackManager extends RackManager { @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 10492c8ccb52..8f3e5f575913 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -35,14 +35,12 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; import org.apache.hbase.thirdparty.com.google.common.collect.Sets; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; -@InterfaceAudience.Private -class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { +@InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { private static final int NO_SERVER = -1; private static final int NO_REGION = -1; private static final boolean DEBUG_MAJOR = false; @@ -51,8 +49,7 @@ class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); - @Override - BalanceAction generate(BalancerClusterState cluster) { + @Override BalanceAction generate(BalancerClusterState cluster) { if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { return BalanceAction.NULL_ACTION; } @@ -65,9 +62,7 @@ BalanceAction generate(BalancerClusterState cluster) { int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; Arrays.stream(cluster.regions) - .flatMap(region -> HubSpotCellCostFunction - .toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT) - .stream()) + .flatMap(region -> HubSpotCellCostFunction.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT).stream()) .forEach(cellOnRegion -> cellCounts[cellOnRegion]++); double[] cellPercents = new double[HubSpotCellCostFunction.MAX_CELL_COUNT]; for (int i = 0; i < cellCounts.length; i++) { @@ -75,55 +70,52 @@ BalanceAction generate(BalancerClusterState cluster) { } List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length) - .mapToObj(serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])) - .collect(Collectors.toList()); + IntStream.range(0, cluster.regionsPerServer.length).mapToObj( + serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); return generateAction(cluster, cellCounts, cellGroupSizesPerServer); } - private BalanceAction generateAction(BalancerClusterState cluster, int[] cellCounts, - List> cellGroupSizesPerServer) { - int targetRegionsPerServer = - Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - List regionCounts = Arrays.stream(cluster.regionsPerServer) - .map(regions -> regions.length).collect(Collectors.toList()); + private BalanceAction generateAction( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer + ) { + int targetRegionsPerServer = Ints.checkedCast( + (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + List regionCounts = Arrays.stream(cluster.regionsPerServer).map(regions -> regions.length) + .collect(Collectors.toList()); - List> bigServers = cellGroupSizesPerServer.stream() - .filter(e -> e.keySet().size() > 7).collect(Collectors.toList()); + List> bigServers = + cellGroupSizesPerServer.stream().filter(e -> e.keySet().size() > 7) + .collect(Collectors.toList()); Map collective = new HashMap<>(); bigServers.forEach(e -> e.forEach((k, v) -> collective.merge(k, v, Integer::sum))); List underloadedServers = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1) - .boxed().collect(Collectors.toList()); + .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1).boxed() + .collect(Collectors.toList()); - // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within - // plus/minus 1 of the target + // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within plus/minus 1 of the target if (!underloadedServers.isEmpty()) { List serversThatCanLoseOneRegion = IntStream.range(0, cluster.numServers) .filter(server -> cluster.regionsPerServer[server].length >= targetRegionsPerServer).boxed() .collect(Collectors.toList()); - return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, - cellGroupSizesPerServer, cluster); + return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, cellGroupSizesPerServer, cluster); } // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps - Pair cellOnServer = - pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); + Pair cellOnServer = pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); if (cellOnServer.getSecond() != NO_SERVER) { return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); } - // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller - // disconnected pieces - // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can - // either swap - // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that - // subsequent rounds + // Step 3: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces + // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can either swap + // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that subsequent rounds // will use step (1) to repair the imbalance. cellOnServer = pickLeastFrequentCellOnMostLoadedServer(cellGroupSizesPerServer, cellCounts, cluster); @@ -132,36 +124,47 @@ private BalanceAction generateAction(BalancerClusterState cluster, int[] cellCou return BalanceAction.NULL_ACTION; } - BalanceAction swapAttempt = - giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); + BalanceAction swapAttempt = giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); if (swapAttempt != BalanceAction.NULL_ACTION) { return swapAttempt; } - return giveAwaySomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cellCounts, cluster); + return giveAwaySomeRegionToImprove( + cellOnServer, + cellGroupSizesPerServer, + cellCounts, + cluster + ); } private Pair pickSecondMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, int[] cellCounts, - BalancerClusterState cluster) { - return IntStream.range(0, cluster.numServers).boxed() + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { + return IntStream.range(0, cluster.numServers) + .boxed() .filter(server -> cellGroupSizesPerServer.get(server).size() > 1) - .map( - server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) - .sorted(Comparator - .comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) - .findFirst().orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); + .map(server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) + .sorted(Comparator.comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) + .findFirst() + .orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); } private short get2ndMostFrequentCell(Map countOfCells) { short mostFrequent = pickMostFrequentCell(countOfCells); - return countOfCells.keySet().stream().filter(cell -> cell != mostFrequent) - .max(Comparator.comparing(countOfCells::get)).get(); + return countOfCells.keySet().stream() + .filter(cell -> cell != mostFrequent) + .max(Comparator.comparing(countOfCells::get)) + .get(); } - private BalanceAction giveAwayRegionViaSwap(Pair cellOnServer, - List> cellGroupSizesPerServer, BalancerClusterState cluster) { + private BalanceAction giveAwayRegionViaSwap( + Pair cellOnServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { short sourceCell = cellOnServer.getFirst(); int sourceServer = cellOnServer.getSecond(); @@ -169,10 +172,11 @@ private BalanceAction giveAwayRegionViaSwap(Pair cellOnServer, Set sourceCells = sourceCellCounts.keySet(); Optional otherServerWithSharedCellAndMostOfTheCellToGiveAway = - IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) + IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> server != sourceServer) .filter(server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell)) - .filter(server -> Sets - .intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) + .filter(server -> Sets.intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(sourceCell))); if (!otherServerWithSharedCellAndMostOfTheCellToGiveAway.isPresent()) { @@ -188,11 +192,13 @@ private BalanceAction giveAwayRegionViaSwap(Pair cellOnServer, return swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); } - private BalanceAction moveRegionFromOverloadedToUnderloaded(List overloadedServers, - List underloadedServers, List> cellGroupSizesPerServer, - BalancerClusterState cluster) { - List overloadedServersMostToLeastCells = overloadedServers.stream() - .sorted( + private BalanceAction moveRegionFromOverloadedToUnderloaded( + List overloadedServers, + List underloadedServers, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + List overloadedServersMostToLeastCells = overloadedServers.stream().sorted( Comparator.comparing(server -> -1 * cellGroupSizesPerServer.get(server).keySet().size())) .collect(Collectors.toList()); // if there's a server w/ excess that has a single instance of a cell that we already have, @@ -212,8 +218,12 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded(List overlo short cellToMove = singletonCellsOnSourceWeCanMoveToTarget.get( ThreadLocalRandom.current().nextInt(singletonCellsOnSourceWeCanMoveToTarget.size())); - return getAction(source, pickRegionForCell(cellsByRegionOnSource, cellToMove), target, - -1); + return getAction( + source, + pickRegionForCell(cellsByRegionOnSource, cellToMove), + target, + -1 + ); } } } @@ -221,22 +231,23 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded(List overlo int target = underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); - // if there's a server w/ excess that has a singleton cell we don't have but only one instance, - // accept it + // if there's a server w/ excess that has a singleton cell we don't have but only one instance, accept it // (0, neutral) for (int source : overloadedServersMostToLeastCells) { Map cellCountsOnServer = cellGroupSizesPerServer.get(source); short leastFrequentCell = pickLeastFrequentCell(cellCountsOnServer); if (cellCountsOnServer.get(leastFrequentCell) == 1) { - return getAction(source, - pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), - leastFrequentCell), - target, NO_REGION); + return getAction( + source, + pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions), leastFrequentCell), + target, + NO_REGION + ); } } - // ok, we give up. just pick a random region from the least loaded cell of some instance and - // call it a day + + // ok, we give up. just pick a random region from the least loaded cell of some instance and call it a day // this will be (-1) but allows balancing to continue int source = overloadedServersMostToLeastCells.get( ThreadLocalRandom.current().nextInt(Math.min(overloadedServersMostToLeastCells.size(), 5))); @@ -244,13 +255,18 @@ private BalanceAction moveRegionFromOverloadedToUnderloaded(List overlo Multimap cellsByRegionForSource = computeCellsByRegion(cluster.regionsPerServer[source], cluster.regions); - return getAction(source, pickRegionForCell(cellsByRegionForSource, cellToMove), target, - NO_REGION); + return getAction(source, + pickRegionForCell(cellsByRegionForSource, cellToMove), target, + NO_REGION + ); } - private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnServer, - List> cellGroupSizesPerServer, int[] cellCounts, - BalancerClusterState cluster) { + private BalanceAction giveAwaySomeRegionToImprove( + Pair cellOnServer, + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { short cell = cellOnServer.getFirst(); int sourceServer = cellOnServer.getSecond(); @@ -258,8 +274,10 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer Map cellCountsOnSource = cellGroupSizesPerServer.get(sourceServer); Set cellsOnSource = cellCountsOnSource.keySet(); - Optional otherServerWithThisCell = - pickOtherServerWithThisCellToGiveItTo(cell, sourceServer, cellGroupSizesPerServer, cluster); + + Optional otherServerWithThisCell = pickOtherServerWithThisCellToGiveItTo( + cell, sourceServer, cellGroupSizesPerServer, cluster + ); int targetServer = NO_SERVER; @@ -278,10 +296,12 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer return BalanceAction.NULL_ACTION; } - MoveRegionAction action = (MoveRegionAction) getAction(sourceServer, - pickRegionForCell( - computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), - targetServer, NO_REGION); + MoveRegionAction action = (MoveRegionAction) getAction( + sourceServer, + pickRegionForCell(computeCellsByRegion(cluster.regionsPerServer[sourceServer], cluster.regions), cell), + targetServer, + NO_REGION + ); if (LOG.isDebugEnabled() || DEBUG_MINOR) { Map cellsOnTarget = cellGroupSizesPerServer.get(targetServer); @@ -295,33 +315,41 @@ private BalanceAction giveAwaySomeRegionToImprove(Pair cellOnSer boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" - : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" - : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" - : "BAD"; - - // System.out.printf( - // "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is - // %s\n", - // action.getFromServer(), - // action.getRegion(), - // action.getToServer(), - // cell, - // cellCountsOnSource.get(cell), - // cellsOnTarget.get(cell), - // descrOfQuality - // ); - LOG.debug( - "Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", - action.getFromServer(), action.getRegion(), action.getToServer(), cell, - cellCountsOnSource.get(cell), cellsOnTarget.get(cell), descrOfQuality); + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + +// System.out.printf( +// "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", +// action.getFromServer(), +// action.getRegion(), +// action.getToServer(), +// cell, +// cellCountsOnSource.get(cell), +// cellsOnTarget.get(cell), +// descrOfQuality +// ); + LOG.debug("Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", + action.getFromServer(), + action.getRegion(), + action.getToServer(), + cell, + cellCountsOnSource.get(cell), + cellsOnTarget.get(cell), + descrOfQuality + ); } return action; } - private Optional pickOtherLowerLoadedServerToGiveCell(int sourceServer, - List> cellGroupSizesPerServer, BalancerClusterState cluster) { + private Optional pickOtherLowerLoadedServerToGiveCell( + int sourceServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { List serversByCellCountAsc = IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) @@ -349,16 +377,23 @@ private Optional pickOtherLowerLoadedServerToGiveCell(int sourceServer, return Optional.of(serverToPick).filter(server -> server != NO_SERVER); } - private Optional pickOtherServerWithThisCellToGiveItTo(short cell, int sourceServer, - List> cellGroupSizesPerServer, BalancerClusterState cluster) { - return IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) + private Optional pickOtherServerWithThisCellToGiveItTo( + short cell, + int sourceServer, + List> cellGroupSizesPerServer, + BalancerClusterState cluster + ) { + return IntStream.range(0, cluster.numServers) + .boxed() + .filter(server -> server != sourceServer) .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) - .filter(server -> cluster.regionsPerServer[server].length - <= Math.ceil((double) cluster.numRegions / cluster.numServers)) + .filter(server -> cluster.regionsPerServer[server].length <= Math.ceil((double) cluster.numRegions / cluster.numServers)) .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cell))); } - private short pickLeastFrequentCell(Map cellCounts) { + private short pickLeastFrequentCell( + Map cellCounts + ) { short cellToPick = -1; int lowestCountSoFar = Integer.MAX_VALUE; double reservoirRandom = -1; @@ -381,7 +416,9 @@ private short pickLeastFrequentCell(Map cellCounts) { return cellToPick; } - private short pickMostFrequentCell(Map cellCounts) { + private short pickMostFrequentCell( + Map cellCounts + ) { short cellToPick = -1; int highestCountSoFar = Integer.MIN_VALUE; double reservoirRandom = -1; @@ -421,14 +458,12 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, } Set sourceCandidateSet = new HashSet<>(); - for (int sourceServerCandidate = 0; sourceServerCandidate - < cellGroupSizesPerServer.size(); sourceServerCandidate++) { + for (int sourceServerCandidate = 0; sourceServerCandidate < cellGroupSizesPerServer.size(); sourceServerCandidate++) { if (sourceServerCandidate == targetServer) { continue; } - Map cellsOnSourceCandidate = - cellGroupSizesPerServer.get(sourceServerCandidate); + Map cellsOnSourceCandidate = cellGroupSizesPerServer.get(sourceServerCandidate); // if that server is perfectly isolated, don't allow that to be broken even to fix another if (cellsOnSourceCandidate.keySet().size() == 1) { @@ -444,54 +479,57 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, if (cellsInCommon.size() > 1) { short commonCellToSwap = cellsInCommon.stream().filter(cell -> cell != sourceCell).findAny().get(); - SwapRegionsAction action = - swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); + SwapRegionsAction action = swapCells(sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSourceCandidate.size(); - int sourceNewTotal = - cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); + int sourceNewTotal = cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = cellCountsOnTargetServer.size() - - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); + int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" - : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) - ? "GOOD" - : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" - : "BAD"; - - // System.out.printf( - // "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), - // " - // + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - // action.getFromServer(), - // action.getFromRegion(), - // action.getToServer(), - // action.getToRegion(), - // commonCellToSwap, - // cellCountsOnTargetServer.get(commonCellToSwap), - // sourceCell, - // cellCountsOnTargetServer.get(sourceCell), - // sourceCell, - // cellsOnSourceCandidate.get(sourceCell), - // commonCellToSwap, - // cellsOnSourceCandidate.get(commonCellToSwap), - // descrOfQuality - // ); - LOG.debug( - "Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + +// System.out.printf( +// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " +// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", +// action.getFromServer(), +// action.getFromRegion(), +// action.getToServer(), +// action.getToRegion(), +// commonCellToSwap, +// cellCountsOnTargetServer.get(commonCellToSwap), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellsOnSourceCandidate.get(sourceCell), +// commonCellToSwap, +// cellsOnSourceCandidate.get(commonCellToSwap), +// descrOfQuality +// ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), action.getFromRegion(), action.getToServer(), - action.getToRegion(), commonCellToSwap, - cellCountsOnTargetServer.get(commonCellToSwap), sourceCell, - cellCountsOnTargetServer.get(sourceCell), sourceCell, - cellsOnSourceCandidate.get(sourceCell), commonCellToSwap, - cellsOnSourceCandidate.get(commonCellToSwap), descrOfQuality); + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + commonCellToSwap, + cellCountsOnTargetServer.get(commonCellToSwap), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSourceCandidate.get(sourceCell), + commonCellToSwap, + cellsOnSourceCandidate.get(commonCellToSwap), + descrOfQuality + ); } return action; } @@ -507,63 +545,76 @@ private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, int sourceServer = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); Map cellsOnSource = cellGroupSizesPerServer.get(sourceServer); - short targetCell = cellsOnTargetServer.stream().filter(cell -> cell != sourceCell) - .sorted(Comparator.comparing(cellCountsOnTargetServer::get)).findFirst().get(); + short targetCell = cellsOnTargetServer.stream() + .filter(cell -> cell != sourceCell) + .sorted(Comparator.comparing(cellCountsOnTargetServer::get)) + .findFirst() + .get(); - SwapRegionsAction action = - swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); + SwapRegionsAction action = swapCells(sourceServer, sourceCell, targetServer, targetCell, cluster); if (LOG.isDebugEnabled() || DEBUG_MAJOR) { int sourceOldTotal = cellsOnSource.size(); int sourceNewTotal = cellsOnSource.size() - (cellsOnSource.get(sourceCell) == 1 ? 1 : 0); int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = - cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); + int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); boolean sourceImproves = sourceNewTotal < sourceOldTotal; boolean targetImproves = targetNewTotal < targetOldTotal; boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; boolean targetStaysSame = targetOldTotal == targetNewTotal; - String descrOfQuality = (sourceImproves && targetImproves) ? "GREAT" - : ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" - : (sourceStaysSame && targetStaysSame) ? "NEUTRAL" - : "BAD"; - - // System.out.printf( - // "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " - // + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", - // action.getFromServer(), - // action.getFromRegion(), - // action.getToServer(), - // action.getToRegion(), - // sourceCell, - // cellCountsOnTargetServer.get(sourceCell), - // sourceCell, - // cellCountsOnTargetServer.get(sourceCell), - // sourceCell, - // cellsOnSource.get(sourceCell), - // sourceCell, - // cellsOnSource.get(sourceCell), - // descrOfQuality - // ); - LOG.debug( - "Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + String descrOfQuality = + (sourceImproves && targetImproves) ? "GREAT" : + ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : + (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : + "BAD"; + +// System.out.printf( +// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " +// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", +// action.getFromServer(), +// action.getFromRegion(), +// action.getToServer(), +// action.getToRegion(), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellCountsOnTargetServer.get(sourceCell), +// sourceCell, +// cellsOnSource.get(sourceCell), +// sourceCell, +// cellsOnSource.get(sourceCell), +// descrOfQuality +// ); + LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), action.getFromRegion(), action.getToServer(), action.getToRegion(), - sourceCell, cellCountsOnTargetServer.get(sourceCell), sourceCell, - cellCountsOnTargetServer.get(sourceCell), sourceCell, cellsOnSource.get(sourceCell), - sourceCell, cellsOnSource.get(sourceCell), descrOfQuality); + action.getFromServer(), + action.getFromRegion(), + action.getToServer(), + action.getToRegion(), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellCountsOnTargetServer.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), + sourceCell, + cellsOnSource.get(sourceCell), + descrOfQuality + ); } return action; } - private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, - BalancerClusterState cluster) { - return (SwapRegionsAction) getAction(fromServer, - resolveCellToRegion(cluster, fromServer, fromCell), toServer, - resolveCellToRegion(cluster, toServer, toCell)); + private SwapRegionsAction swapCells(int fromServer, short fromCell, int toServer, short toCell, BalancerClusterState cluster) { + return (SwapRegionsAction) getAction( + fromServer, + resolveCellToRegion(cluster, fromServer, fromCell), + toServer, + resolveCellToRegion(cluster, toServer, toCell) + ); } private int resolveCellToRegion(BalancerClusterState cluster, int server, short cell) { @@ -572,18 +623,22 @@ private int resolveCellToRegion(BalancerClusterState cluster, int server, short return pickRegionForCell(cellsByRegion, cell); } - private SwapRegionsAction swap(int receivingServer, short cellToGiveToReceivingServer, - int offeringServer, short cellToOfferFromReceivingServerToOrigin, - BalancerClusterState cluster) { + private SwapRegionsAction swap( + int receivingServer, + short cellToGiveToReceivingServer, + int offeringServer, + short cellToOfferFromReceivingServerToOrigin, + BalancerClusterState cluster + ) { Multimap cellsByRegionForReceivingServer = computeCellsByRegion(cluster.regionsPerServer[receivingServer], cluster.regions); Multimap cellsByRegionForOfferingServer = computeCellsByRegion(cluster.regionsPerServer[offeringServer], cluster.regions); - return (SwapRegionsAction) getAction(offeringServer, - pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), - receivingServer, - pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin)); + return (SwapRegionsAction) getAction( + offeringServer, pickRegionForCell(cellsByRegionForOfferingServer, cellToGiveToReceivingServer), + receivingServer, pickRegionForCell(cellsByRegionForReceivingServer, cellToOfferFromReceivingServerToOrigin) + ); } private int pickRegionForCell(Multimap cellsByRegionOnServer, short cellToMove) { @@ -595,16 +650,14 @@ private int pickRegionForCell(Multimap cellsByRegionOnServer, sh static List computeCellsPerRs(BalancerClusterState cluster) { List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length) - .mapToObj(serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])) - .collect(Collectors.toList()); + IntStream.range(0, cluster.regionsPerServer.length).mapToObj( + serverIndex -> computeCellGroupSizes(cluster, serverIndex, + cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); } private Pair pickMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, int[] cellCounts, - BalancerClusterState cluster) { + List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { cluster.sortServersByRegionCount(); int[][] regionsPerServer = cluster.regionsPerServer; @@ -619,26 +672,26 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( int[] regionsForServer = regionsPerServer[serverIndex]; Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); - Set cellsOnThisServerAndOthers = cellsOnServer.keySet().stream() - .filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]).collect(Collectors.toSet()); + Set cellsOnThisServerAndOthers = + cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) + .collect(Collectors.toSet()); - if ( - cellsOnServer.keySet().size() <= targetCellsPerServer - // if we have a small cell where the entire cell is local, we MUST have at least 2 cells - // on this server to have - // an overall region balance, so allow us to go over the target by 1 cell - || cellsOnThisServerAndOthers.size() == 1 - ) { + if (cellsOnServer.keySet().size() <= targetCellsPerServer + // if we have a small cell where the entire cell is local, we MUST have at least 2 cells on this server to have + // an overall region balance, so allow us to go over the target by 1 cell + || cellsOnThisServerAndOthers.size() == 1) { continue; } - List> cellsByFrequencyAsc = cellsOnServer.entrySet().stream() - .sorted(Map.Entry.comparingByValue()).collect(Collectors.toList()); + List> cellsByFrequencyAsc = + cellsOnServer.entrySet().stream().sorted(Map.Entry.comparingByValue()) + .collect(Collectors.toList()); if (cellsByFrequencyAsc.isEmpty()) { continue; } + int probe = cellsByFrequencyAsc.size() - 1; short mostFrequentCellTemp = -1; int mostFrequentCellCountTemp = -1; @@ -648,7 +701,7 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( mostFrequentCellTemp = entry.getKey(); mostFrequentCellCountTemp = entry.getValue(); probe--; - } while (mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); + } while(mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); final short mostFrequentCell = mostFrequentCellTemp; final int mostFrequentCellCount = mostFrequentCellCountTemp; @@ -661,8 +714,7 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( long numServersWithMostFrequentCellNotSaturated = cellGroupSizesPerServer.stream().filter(cellMap -> cellMap.containsKey(mostFrequentCell)) .filter(cellMap -> cellMap.keySet().size() > 1).count(); - // if we're down to only one server unsaturated with the most frequent cell, there are no good - // swaps + // if we're down to only one server unsaturated with the most frequent cell, there are no good swaps if (numServersWithMostFrequentCellNotSaturated == 1) { continue; } @@ -692,8 +744,10 @@ private Pair pickMostFrequentCellOnAnyUnsaturatedServer( } private Pair pickLeastFrequentCellOnMostLoadedServer( - List> cellGroupSizesPerServer, int[] cellCounts, - BalancerClusterState cluster) { + List> cellGroupSizesPerServer, + int[] cellCounts, + BalancerClusterState cluster + ) { int targetCellsPerServer = Ints.checkedCast( (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); @@ -701,16 +755,14 @@ private Pair pickLeastFrequentCellOnMostLoadedServer( .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) .collect(Collectors.toList()).get(cluster.numServers - 1); - Map cellCountsForHighestLoadedServer = - cellGroupSizesPerServer.get(highestLoadedServer); + Map cellCountsForHighestLoadedServer = cellGroupSizesPerServer.get(highestLoadedServer); int numCellsOnHighestLoadedServer = cellCountsForHighestLoadedServer.keySet().size(); if (numCellsOnHighestLoadedServer <= targetCellsPerServer + 1) { return Pair.newPair((short) -1, -1); } - return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), - highestLoadedServer); + return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), highestLoadedServer); } private static Map computeCellGroupSizes(BalancerClusterState cluster, @@ -740,16 +792,16 @@ private static Map computeCellGroupSizes(BalancerClusterState cl byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - short startCellId = (startKey == null || startKey.length == 0) - ? 0 - : (startKey.length >= 2 - ? Bytes.toShort(startKey, 0, 2) - : Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) - ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) - : (endKey.length >= 2 - ? Bytes.toShort(endKey, 0, 2) - : Bytes.toShort(new byte[] { -1, endKey[0] })); + short startCellId = (startKey == null || startKey.length == 0) ? + 0 : + (startKey.length >= 2 ? + Bytes.toShort(startKey, 0, 2) : + Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) ? + (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (endKey.length >= 2 ? + Bytes.toShort(endKey, 0, 2) : + Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; @@ -793,16 +845,16 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio byte[] startKey = region.getStartKey(); byte[] endKey = region.getEndKey(); - short startCellId = (startKey == null || startKey.length == 0) - ? 0 - : (startKey.length >= 2 - ? Bytes.toShort(startKey, 0, 2) - : Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) - ? (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) - : (endKey.length >= 2 - ? Bytes.toShort(endKey, 0, 2) - : Bytes.toShort(new byte[] { -1, endKey[0] })); + short startCellId = (startKey == null || startKey.length == 0) ? + 0 : + (startKey.length >= 2 ? + Bytes.toShort(startKey, 0, 2) : + Bytes.toShort(new byte[] { 0, startKey[0] })); + short endCellId = (endKey == null || endKey.length == 0) ? + (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (endKey.length >= 2 ? + Bytes.toShort(endKey, 0, 2) : + Bytes.toShort(new byte[] { -1, endKey[0] })); if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 8f7593019f1c..27c95460894b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -36,7 +36,6 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; @@ -68,10 +67,8 @@ public class HubSpotCellCostFunction extends CostFunction { private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - static class Int2IntCounterMapAdapter - implements JsonSerializer, JsonDeserializer { - @Override - public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, + static class Int2IntCounterMapAdapter implements JsonSerializer, JsonDeserializer { + @Override public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, JsonSerializationContext context) { JsonObject obj = new JsonObject(); @@ -102,8 +99,7 @@ public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, return obj; } - @Override - public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, + @Override public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { JsonObject obj = json.getAsJsonObject(); @@ -149,47 +145,53 @@ public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, } } - static final Gson OBJECT_MAPPER = - new GsonBuilder().excludeFieldsWithoutExposeAnnotation().enableComplexMapKeySerialization() - .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) - .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { - JsonObject obj = json.getAsJsonObject(); + static final Gson OBJECT_MAPPER = new GsonBuilder() + .excludeFieldsWithoutExposeAnnotation() + .enableComplexMapKeySerialization() + .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) + .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { + JsonObject obj = json.getAsJsonObject(); - boolean split = obj.get("split").getAsBoolean(); - long regionId = obj.get("regionId").getAsLong(); - int replicaId = obj.get("replicaId").getAsInt(); - JsonObject tableName = obj.get("tableName").getAsJsonObject(); - JsonArray startKey = obj.get("startKey").getAsJsonArray(); - JsonArray endKey = obj.get("endKey").getAsJsonArray(); + boolean split = obj.get("split").getAsBoolean(); + long regionId = obj.get("regionId").getAsLong(); + int replicaId = obj.get("replicaId").getAsInt(); + JsonObject tableName = obj.get("tableName").getAsJsonObject(); + JsonArray startKey = obj.get("startKey").getAsJsonArray(); + JsonArray endKey = obj.get("endKey").getAsJsonArray(); - byte[] startKeyBytes = new byte[startKey.size()]; - byte[] endKeyBytes = new byte[endKey.size()]; + byte[] startKeyBytes = new byte[startKey.size()]; + byte[] endKeyBytes = new byte[endKey.size()]; - for (int i = 0; i < startKey.size(); i++) { - startKeyBytes[i] = startKey.get(i).getAsByte(); - } - for (int i = 0; i < endKey.size(); i++) { - endKeyBytes[i] = endKey.get(i).getAsByte(); - } + for (int i = 0; i < startKey.size(); i++) { + startKeyBytes[i] = startKey.get(i).getAsByte(); + } + for (int i = 0; i < endKey.size(); i++) { + endKeyBytes[i] = endKey.get(i).getAsByte(); + } - TableName tb = TableName.valueOf(tableName.get("namespaceAsString").getAsString(), - tableName.get("qualifierAsString").getAsString()); + TableName tb = TableName.valueOf( + tableName.get("namespaceAsString").getAsString(), + tableName.get("qualifierAsString").getAsString() + ); - RegionInfo result = RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) + RegionInfo result = + RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) .setReplicaId(replicaId).setStartKey(startKeyBytes).setEndKey(endKeyBytes).build(); - return result; - }).addDeserializationExclusionStrategy(new ExclusionStrategy() { - @Override - public boolean shouldSkipField(FieldAttributes f) { - return f.getName().equals("serversToIndex") || f.getName().equals("regionsToIndex") - || f.getName().equals("clusterState"); - } + return result; + }) + .addDeserializationExclusionStrategy(new ExclusionStrategy() { + @Override public boolean shouldSkipField(FieldAttributes f) { + return f.getName().equals("serversToIndex") + || f.getName().equals("regionsToIndex") + || f.getName().equals("clusterState") + ; + } - @Override - public boolean shouldSkipClass(Class clazz) { - return false; - } - }).create(); + @Override public boolean shouldSkipClass(Class clazz) { + return false; + } + }) + .create(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; // hack - hard code this for now static final short MAX_CELL_COUNT = 360; @@ -217,9 +219,10 @@ void prepare(BalancerClusterState cluster) { servers = cluster.servers; super.prepare(cluster); - if ( - LOG.isTraceEnabled() && cluster.tables.contains("objects-3") && cluster.regions != null - && cluster.regions.length > 0 + if (LOG.isTraceEnabled() + && cluster.tables.contains("objects-3") + && cluster.regions != null + && cluster.regions.length > 0 ) { try { LOG.trace("{} cluster state:\n{}", cluster.tables, OBJECT_MAPPER.toJson(cluster)); @@ -231,38 +234,43 @@ void prepare(BalancerClusterState cluster) { this.serverHasCell = new boolean[numServers][numCells]; this.bestCaseMaxCellsPerServer = (int) Math.min(1, Math.ceil((double) numCells / numServers)); this.numRegionCellsOverassigned = - calculateCurrentCellCost(numCells, numServers, bestCaseMaxCellsPerServer, regions, - regionIndexToServerIndex, serverHasCell, super.cluster::getRegionSizeMB); - - if ( - regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") - && LOG.isTraceEnabled() + calculateCurrentCellCost( + numCells, + numServers, + bestCaseMaxCellsPerServer, + regions, regionIndexToServerIndex, + serverHasCell, + super.cluster::getRegionSizeMB + ); + + if (regions.length > 0 + && regions[0].getTable().getNamespaceAsString().equals("default") + && LOG.isTraceEnabled() ) { - LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), - snapshotState()); + LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), snapshotState()); } } - @Override - boolean isNeeded() { + @Override boolean isNeeded() { return cluster.tables.stream().anyMatch(name -> name.contains("objects-3")); } - @Override - protected void regionMoved(int region, int oldServer, int newServer) { + @Override protected void regionMoved(int region, int oldServer, int newServer) { RegionInfo movingRegion = regions[region]; if (!movingRegion.getTable().getNamespaceAsString().equals("default")) { return; } - Set cellsOnRegion = - toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + Set cellsOnRegion = toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); if (LOG.isDebugEnabled()) { - LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", region, - Bytes.toHex(movingRegion.getStartKey()), Bytes.toHex(movingRegion.getEndKey()), - cellsOnRegion); + LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", + region, + Bytes.toHex(movingRegion.getStartKey()), + Bytes.toHex(movingRegion.getEndKey()), + cellsOnRegion + ); } Map numRegionsForCellOnOldServer = computeCellFrequencyForServer(oldServer); @@ -274,8 +282,13 @@ protected void regionMoved(int region, int oldServer, int newServer) { if (LOG.isDebugEnabled()) { LOG.debug( "Old server {} [{}] has cell frequency of {}.\n\nNew server {} [{}] has cell frequency of {}.", - oldServer, currentCellCountOldServer, numRegionsForCellOnOldServer, newServer, - currentCellCountNewServer, numRegionsForCellOnNewServer); + oldServer, + currentCellCountOldServer, + numRegionsForCellOnOldServer, + newServer, + currentCellCountNewServer, + numRegionsForCellOnNewServer + ); } int changeInOverassignedRegionCells = 0; @@ -300,8 +313,7 @@ protected void regionMoved(int region, int oldServer, int newServer) { } if (LOG.isDebugEnabled()) { - LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, - changeInOverassignedRegionCells); + LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, changeInOverassignedRegionCells); } numRegionCellsOverassigned += changeInOverassignedRegionCells; @@ -316,7 +328,10 @@ private Map computeCellFrequencyForServer(int server) { cellsInRegion.forEach(cell -> regionsByCell.put(cell, regionIndex)); } - return regionsByCell.build().asMap().entrySet().stream() + return regionsByCell.build() + .asMap() + .entrySet() + .stream() .collect(ImmutableMap.toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().size())); } @@ -376,9 +391,15 @@ protected double cost() { return numRegionCellsOverassigned; } - static int calculateCurrentCellCost(short numCells, int numServers, int bestCaseMaxCellsPerServer, - RegionInfo[] regions, int[] regionLocations, boolean[][] serverHasCell, - Function getRegionSizeMbFunc) { + static int calculateCurrentCellCost( + short numCells, + int numServers, + int bestCaseMaxCellsPerServer, + RegionInfo[] regions, + int[] regionLocations, + boolean[][] serverHasCell, + Function getRegionSizeMbFunc + ) { Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); @@ -444,8 +465,12 @@ static int calculateCurrentCellCost(short numCells, int numServers, int bestCase return cost; } - private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, byte[] endKey, - short numCells) { + private static void setCellsForServer( + boolean[] serverHasCell, + byte[] startKey, + byte[] endKey, + short numCells + ) { short startCellId = (startKey == null || startKey.length == 0) ? 0 : (startKey.length >= 2 @@ -477,8 +502,7 @@ private static void setCellsForServer(boolean[] serverHasCell, byte[] startKey, } static boolean isStopExclusive(byte[] endKey) { - return endKey != null && endKey.length == 2 - || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); + return endKey != null && endKey.length == 2 || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); } static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index 6d3857944f7c..85b15599e580 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -250,12 +250,10 @@ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { - LOG.debug("Region {} is located on {}", regionNameAsString, - tableDescriptor.getTableName().getNameAsString()); + LOG.debug("Region {} is located on {}", regionNameAsString, tableDescriptor.getTableName().getNameAsString()); HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); - LOG.debug("Top hosts for region {}: {}", regionNameAsString, - blocksDistribution.getTopHosts()); + LOG.debug("Top hosts for region {}: {}", regionNameAsString, blocksDistribution.getTopHosts()); return blocksDistribution; } } catch (IOException ioe) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 3dae3fe0bd6e..c7f872fc0844 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -234,8 +234,7 @@ protected List createCandidateGenerators() { candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), new RegionReplicaRackCandidateGenerator()); - candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), - new HubSpotCellBasedCandidateGenerator()); + candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); return candidateGenerators; } @@ -565,8 +564,7 @@ protected List balanceTable(TableName tableName, LOG.info( "[{}] Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "functionCost={} computedMaxSteps={}", - tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), - computedMaxSteps); + tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), computedMaxSteps); final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. @@ -584,9 +582,9 @@ protected List balanceTable(TableName tableName, newCost = computeCost(cluster, currentCost); - if (LOG.isTraceEnabled()) { - LOG.trace("S[{}]: {} -> {} via {} -- {}", step, currentCost, newCost, action, - totalCostsPerFunc()); + if(LOG.isTraceEnabled()) { + LOG.trace("S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); } // Should this be kept? @@ -620,16 +618,15 @@ protected List balanceTable(TableName tableName, + " to try {} different iterations. Found a solution that moves " + "{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. funtionCost={}", - tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), - initCost / sumMultiplier, currentCost / sumMultiplier, functionCost()); + tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), initCost / sumMultiplier, + currentCost / sumMultiplier, functionCost()); sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); return plans; } LOG.info( "[{}] Could not find a better moving plan. Tried {} different configurations in " + "{} ms, and did not find anything with an imbalance score less than {}", - tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, - initCost / sumMultiplier); + tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, initCost / sumMultiplier); return null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java deleted file mode 100644 index e73b979356d8..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.normalizer; - -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.hbase.client.TableDescriptor; -import org.apache.hadoop.hbase.util.Bytes; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -final class HubspotCellAwareNormalizer extends SimpleRegionNormalizer { - private static final Logger LOG = LoggerFactory.getLogger(HubspotCellAwareNormalizer.class); - - @Override - public List computePlansForTable(TableDescriptor tableDescriptor) { - List allPlans = super.computePlansForTable(tableDescriptor); - List filteredPlans = new ArrayList<>(allPlans.size()); - - for (NormalizationPlan plan : allPlans) { - boolean shouldInclude = shouldIncludePlan(plan); - if (shouldInclude) { - filteredPlans.add(plan); - } else { - LOG.info("Skipping plan: {}", plan); - } - } - - return filteredPlans; - } - - private static boolean shouldIncludePlan(NormalizationPlan plan) { - switch (plan.getType()) { - case MERGE: - return shouldIncludeMergePlan((MergeNormalizationPlan) plan); - case NONE: - case SPLIT: - return true; - default: - throw new RuntimeException("Unknown plan type: " + plan.getType()); - } - } - - private static boolean shouldIncludeMergePlan(MergeNormalizationPlan plan) { - List targets = plan.getNormalizationTargets(); - - if (targets.size() <= 1) { - return true; - } - - byte[] endKey = targets.get(0).getRegionInfo().getEndKey(); - short cell = Bytes.toShort(endKey); - - for (int i = 1; i < targets.size(); ++i) { - endKey = targets.get(i).getRegionInfo().getEndKey(); - if (cell != Bytes.toShort(endKey)) { - return false; - } - } - - return true; - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index 71fe20be79c6..f97622b40631 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hbase.master.normalizer; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -56,12 +58,9 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio */ private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer - // Class balancerKlass = - // conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - // RegionNormalizer.class); - // return ReflectionUtils.newInstance(balancerKlass, conf); - - // HACK - return new HubspotCellAwareNormalizer(); + Class balancerKlass = + conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + RegionNormalizer.class); + return ReflectionUtils.newInstance(balancerKlass, conf); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index cbbf33edadba..fa9f358883ec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -105,13 +105,20 @@ public void testCellCountBothEndsNull() { @Test public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, 1, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[] { 0, 1, 2, 3 }, - new boolean[][] { { false, false, false, false }, { false, false, false, false }, - { false, false, false, false }, { false, false, false, false } }, - ALL_REGIONS_SIZE_1_MB); + int cost = HubSpotCellCostFunction.calculateCurrentCellCost + ((short) 4, + 4, + 1, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[] { 0 , 1 , 2 , 3 }, + new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, + ALL_REGIONS_SIZE_1_MB + ); assertEquals(0, cost); } @@ -119,12 +126,18 @@ public void testCostBalanced() { @Test public void testCostImbalanced() { // 4 cells, 4 servers, imbalanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost((short) 4, 4, 1, - new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), - buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, - new int[] { 0, 0, 0, 0 }, - new boolean[][] { { false, false, false, false }, { false, false, false, false }, - { false, false, false, false }, { false, false, false, false } }, + int cost = HubSpotCellCostFunction.calculateCurrentCellCost( + (short) 4, + 4, + 1, + new RegionInfo[] { + buildRegionInfo(null, (short) 1), + buildRegionInfo((short) 1, (short) 2), + buildRegionInfo((short) 2, (short) 3), + buildRegionInfo((short) 3, null) + }, + new int[] { 0 , 0 , 0 , 0 }, + new boolean[][] {{false, false, false, false}, {false, false, false, false}, {false, false, false, false}, {false, false, false, false}}, ALL_REGIONS_SIZE_1_MB); assertTrue(cost > 0); } From 3526567dc20471139acbfe8274623391db80bc61 Mon Sep 17 00:00:00 2001 From: Hernan Gelaf-Romer Date: Tue, 26 Nov 2024 16:16:53 -0500 Subject: [PATCH 084/126] add hubspot normalizer --- .../HubspotCellAwareNormalizer.java | 78 +++++++++++++++++++ .../normalizer/RegionNormalizerFactory.java | 14 ++-- 2 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java new file mode 100644 index 000000000000..e73b979356d8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.normalizer; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.util.Bytes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class HubspotCellAwareNormalizer extends SimpleRegionNormalizer { + private static final Logger LOG = LoggerFactory.getLogger(HubspotCellAwareNormalizer.class); + + @Override + public List computePlansForTable(TableDescriptor tableDescriptor) { + List allPlans = super.computePlansForTable(tableDescriptor); + List filteredPlans = new ArrayList<>(allPlans.size()); + + for (NormalizationPlan plan : allPlans) { + boolean shouldInclude = shouldIncludePlan(plan); + if (shouldInclude) { + filteredPlans.add(plan); + } else { + LOG.info("Skipping plan: {}", plan); + } + } + + return filteredPlans; + } + + private static boolean shouldIncludePlan(NormalizationPlan plan) { + switch (plan.getType()) { + case MERGE: + return shouldIncludeMergePlan((MergeNormalizationPlan) plan); + case NONE: + case SPLIT: + return true; + default: + throw new RuntimeException("Unknown plan type: " + plan.getType()); + } + } + + private static boolean shouldIncludeMergePlan(MergeNormalizationPlan plan) { + List targets = plan.getNormalizationTargets(); + + if (targets.size() <= 1) { + return true; + } + + byte[] endKey = targets.get(0).getRegionInfo().getEndKey(); + short cell = Bytes.toShort(endKey); + + for (int i = 1; i < targets.size(); ++i) { + endKey = targets.get(i).getRegionInfo().getEndKey(); + if (cell != Bytes.toShort(endKey)) { + return false; + } + } + + return true; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index f97622b40631..fb071b5438fa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -18,12 +18,10 @@ package org.apache.hadoop.hbase.master.normalizer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -58,9 +56,13 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio */ private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer - Class balancerKlass = - conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - RegionNormalizer.class); - return ReflectionUtils.newInstance(balancerKlass, conf); + // Class balancerKlass = + // conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + // RegionNormalizer.class); + // return ReflectionUtils.newInstance(balancerKlass, conf); + + // HACK + return new HubspotCellAwareNormalizer(); } } + From 83dad7ff1cb397febad2b3ebc699c2b289097a73 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 26 Nov 2024 18:05:19 -0500 Subject: [PATCH 085/126] Prioritize spreading cells out --- .../HubSpotCellBasedCandidateGenerator.java | 918 +++--------------- .../balancer/HubSpotCellCostFunction.java | 16 +- 2 files changed, 142 insertions(+), 792 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 206ad2d7fa31..b7c94f815737 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -17,9 +17,7 @@ */ package org.apache.hadoop.hbase.master.balancer; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -38,14 +36,10 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.Multimap; -import org.apache.hbase.thirdparty.com.google.common.collect.Sets; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; @InterfaceAudience.Private class HubSpotCellBasedCandidateGenerator extends CandidateGenerator { - private static final int NO_SERVER = -1; private static final int NO_REGION = -1; - private static final boolean DEBUG_MAJOR = false; - private static final boolean DEBUG_MINOR = false; private static final Logger LOG = LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); @@ -72,8 +66,7 @@ List> cellGroupSizesPerServer = IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + serverIndex -> computeCellGroupSizes(cluster, cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); return generateAction(cluster, cellCounts, cellGroupSizesPerServer); } @@ -84,673 +77,168 @@ private BalanceAction generateAction( List> cellGroupSizesPerServer ) { int targetRegionsPerServer = Ints.checkedCast( - (long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - List regionCounts = Arrays.stream(cluster.regionsPerServer).map(regions -> regions.length) - .collect(Collectors.toList()); - - List> bigServers = - cellGroupSizesPerServer.stream().filter(e -> e.keySet().size() > 7) - .collect(Collectors.toList()); - Map collective = new HashMap<>(); - bigServers.forEach(e -> e.forEach((k, v) -> collective.merge(k, v, Integer::sum))); - - List underloadedServers = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length < targetRegionsPerServer - 1).boxed() - .collect(Collectors.toList()); - - // Step 1: if a previous action unbalanced us, try to rebalance region balance to be within plus/minus 1 of the target - if (!underloadedServers.isEmpty()) { - List serversThatCanLoseOneRegion = IntStream.range(0, cluster.numServers) - .filter(server -> cluster.regionsPerServer[server].length >= targetRegionsPerServer).boxed() - .collect(Collectors.toList()); - - return moveRegionFromOverloadedToUnderloaded(serversThatCanLoseOneRegion, underloadedServers, cellGroupSizesPerServer, cluster); - } - - // Step 3: balanced regions, so let's take cells spread over many servers and collect them to one - BalanceAction action = moveCellOnMultipleServersFromLowLoadedServerToBetterOne(cellGroupSizesPerServer, cellCounts, cluster); + (long) Math.floor((double) cluster.numRegions / cluster.numServers)); - if (action.getType() != BalanceAction.Type.NULL) { - return action; + int numTimesCellRegionsFillAllServers = 0; + for (int cell = 0; cell < HubSpotCellCostFunction.MAX_CELL_COUNT; cell++) { + int numRegionsForCell = cellCounts[cell]; + numTimesCellRegionsFillAllServers += Ints.checkedCast((long) Math.floor((double) numRegionsForCell / cluster.numServers)); } - // Step 2: knowing we have region balance, try to expand the highest frequency cell(s) via swaps - Pair cellOnServer = pickMostFrequentCellOnAnyUnsaturatedServer(cellGroupSizesPerServer, cellCounts, cluster); + int targetCellsPerServer = targetRegionsPerServer - numTimesCellRegionsFillAllServers; + BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); - if (cellOnServer.getSecond() != NO_SERVER) { - return swapSomeRegionToImprove(cellOnServer, cellGroupSizesPerServer, cluster); + if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { + return moveRegionToUnderloadedServer; } - // Step 4: balanced regions, and many/most servers are full now. We have a lot of smaller disconnected pieces - // left to sort out. Pick the most loaded server, and try to reduce the cell count by 1. We can either swap - // if possible, or give away if not. We're allowed to slightly imbalance here, knowing that subsequent rounds - // will use step (1) to repair the imbalance. - cellOnServer = - pickLeastFrequentCellOnMostLoadedServer(cellGroupSizesPerServer, cellCounts, cluster); - if (cellOnServer.getSecond() == NO_SERVER) { - return BalanceAction.NULL_ACTION; - } - - BalanceAction swapAttempt = giveAwayRegionViaSwap(cellOnServer, cellGroupSizesPerServer, cluster); - - if (swapAttempt != BalanceAction.NULL_ACTION) { - return swapAttempt; - } - - return giveAwaySomeRegionToImprove( - cellOnServer, - cellGroupSizesPerServer, - cellCounts, - cluster - ); + return swapRegionsToIncreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); } - private BalanceAction moveCellOnMultipleServersFromLowLoadedServerToBetterOne( + private BalanceAction swapRegionsToIncreaseDistinctCellsPerServer( + BalancerClusterState cluster, + int[] cellCounts, List> cellGroupSizesPerServer, - int[] numRegiosnPerCell, - BalancerClusterState cluster + int targetCellsPerServer ) { - // if there are 2 servers that aren't "full" with this cell, we can move all regions for it off - // the lower load one, onto the higher load one - List candidateCells = - IntStream.range(0, HubSpotCellCostFunction.MAX_CELL_COUNT).mapToObj(cell -> (short) cell) - .filter(cell -> { - long numSwapCandidates = IntStream.range(0, cluster.numServers).boxed().filter( - server -> isCellOnServerGoodSwapCandidate(server, cell, numRegiosnPerCell, - cellGroupSizesPerServer)).count(); - // if there are 2 servers that aren't "full" with this cell, we can move all regions for it off - // the lower load one, onto the higher load one - return numSwapCandidates > 1; - }) - .collect(Collectors.toList()); - - - List lightestCandidates = new ArrayList<>(); - int lowestCountSoFar = Integer.MAX_VALUE; - for (short cell : candidateCells) { - int lowestInstanceCountForCell = IntStream.range(0, cluster.numServers) - .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) - .map(server -> cellGroupSizesPerServer.get(server).get(cell)).min().getAsInt(); - - if (lowestInstanceCountForCell < lowestCountSoFar) { - lightestCandidates = new ArrayList<>(); - lightestCandidates.add(cell); - lowestCountSoFar = lowestInstanceCountForCell; - } else if (lowestInstanceCountForCell == lowestCountSoFar) { - lightestCandidates.add(cell); - } - } - - Collections.shuffle(lightestCandidates); - - if (lightestCandidates.isEmpty()) { + Optional fromServerMaybe = pickServerWithoutEnoughIsolation(cluster, cellGroupSizesPerServer, targetCellsPerServer); + if (!fromServerMaybe.isPresent()) { return BalanceAction.NULL_ACTION; } + int fromServer = fromServerMaybe.get(); + short fromCell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); - for (int i = 0; i < lightestCandidates.size(); i++) { - short sourceCell = lightestCandidates.get(i); - List sourceCandidates = IntStream.range(0, cluster.numServers).boxed().filter( - server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell) - && cellGroupSizesPerServer.get(server).keySet().size() > 1 - && cellGroupSizesPerServer.get(server).get(sourceCell) != numRegiosnPerCell[sourceCell]) - .collect(Collectors.toList()); - - if (sourceCandidates.isEmpty()) { - continue; - } - - int sourceServer = sourceCandidates.stream() - .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).size())) - .get(); - - int sourceCellCount = cellGroupSizesPerServer.get(sourceServer).get(sourceCell); - - Optional targetServerMaybe = IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> server != sourceServer) - .filter(server -> isCellOnServerGoodSwapCandidate(server, sourceCell, numRegiosnPerCell, cellGroupSizesPerServer)) - .filter(server -> cellGroupSizesPerServer.get(server).get(sourceCell) >= sourceCellCount) - .filter(server -> cellGroupSizesPerServer.get(server).size() > 2) - .filter(server -> cellGroupSizesPerServer.get(server).size() > sourceCellCount) - .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).entrySet().stream().filter(entry -> entry.getValue() == 1).count())); - - if (!targetServerMaybe.isPresent()) { - continue; - } - - int targetServer = targetServerMaybe.get(); - - Optional targetCellMaybe = - cellGroupSizesPerServer.get(targetServer).keySet().stream().filter(cell -> cell != sourceCell) - .min(Comparator.comparing(cellGroupSizesPerServer.get(targetServer)::get)); - - if (!targetCellMaybe.isPresent()) { - continue; - } - - short targetCell = targetCellMaybe.get(); - - int change = - (cellGroupSizesPerServer.get(sourceServer).getOrDefault(sourceCell, 0) == 1 ? -1 : 0) + - (cellGroupSizesPerServer.get(targetServer).getOrDefault(sourceCell, 0) == 0 ? 1 : 0) + - (cellGroupSizesPerServer.get(sourceServer).getOrDefault(targetCell, 0) == 0 ? 1 : 0) + - (cellGroupSizesPerServer.get(targetServer).getOrDefault(targetCell, 0) == 1 ? -1 : 0) - ; - - if (change >= 0) { - continue; - } - - return swapCells("sparse cells", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); - } - - return BalanceAction.NULL_ACTION; - } - - private boolean isCellOnServerGoodSwapCandidate( - int server, - short cell, - int[] numRegionsPerCell, - List> cellGroupSizesPerServer - ) { - Map cellCounts = cellGroupSizesPerServer.get(server); - if (!cellCounts.containsKey(cell)) { - return false; + Optional> toCellMaybe = pickCellOnServerNotPresentOnSource(cluster, cellCounts, cellGroupSizesPerServer, fromServer, fromCell); + if (!toCellMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; } - int numOtherCellsWithRepresentationElsewhere = Ints.checkedCast( - cellCounts.keySet().stream() - .filter(testCell -> testCell != cell) - .filter(testCell -> cellCounts.get(testCell) < numRegionsPerCell[testCell]) - .count()); - - if (numOtherCellsWithRepresentationElsewhere == 0) { - return false; - } + short toCell = toCellMaybe.get().getFirst(); + int toServer = toCellMaybe.get().getSecond(); - return true; + return swapCells("swap", fromServer, fromCell, toServer, toCell, cellGroupSizesPerServer, cluster); } - private Pair pickSecondMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, + private Optional> pickCellOnServerNotPresentOnSource( + BalancerClusterState cluster, int[] cellCounts, - BalancerClusterState cluster - ) { - return IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> cellGroupSizesPerServer.get(server).size() > 1) - .map(server -> Pair.newPair(get2ndMostFrequentCell(cellGroupSizesPerServer.get(server)), server)) - .sorted(Comparator.comparing(pair -> -1 * cellGroupSizesPerServer.get(pair.getSecond()).get(pair.getFirst()))) - .findFirst() - .orElseGet(() -> Pair.newPair((short) -1, NO_SERVER)); - } - - private short get2ndMostFrequentCell(Map countOfCells) { - short mostFrequent = pickMostFrequentCell(countOfCells); - return countOfCells.keySet().stream() - .filter(cell -> cell != mostFrequent) - .max(Comparator.comparing(countOfCells::get)) - .get(); - } - - private BalanceAction giveAwayRegionViaSwap( - Pair cellOnServer, List> cellGroupSizesPerServer, - BalancerClusterState cluster + int fromServer, + short cell ) { - short sourceCell = cellOnServer.getFirst(); - int sourceServer = cellOnServer.getSecond(); - - Map sourceCellCounts = cellGroupSizesPerServer.get(sourceServer); - Set sourceCells = sourceCellCounts.keySet(); - - Optional otherServerWithSharedCellAndMostOfTheCellToGiveAway = - IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> server != sourceServer) - .filter(server -> cellGroupSizesPerServer.get(server).containsKey(sourceCell)) - .filter(server -> Sets.intersection(cellGroupSizesPerServer.get(server).keySet(), sourceCells).size() > 1) - .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(sourceCell))); - - if (!otherServerWithSharedCellAndMostOfTheCellToGiveAway.isPresent()) { - return BalanceAction.NULL_ACTION; - } - - int targetServer = otherServerWithSharedCellAndMostOfTheCellToGiveAway.get(); - Map targetCells = cellGroupSizesPerServer.get(targetServer); - - short targetCell = targetCells.keySet().stream().filter(cell -> cell != sourceCell) - .filter(sourceCells::contains).findAny().get(); - - return swapCells("improve least loaded", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); - } + Map countsForFromServer = cellGroupSizesPerServer.get(fromServer); + Optional> result = Optional.empty(); - private BalanceAction moveRegionFromOverloadedToUnderloaded( - List overloadedServers, - List underloadedServers, - List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { - List overloadedServersMostToLeastCells = overloadedServers.stream().sorted( - Comparator.comparing(server -> -1 * cellGroupSizesPerServer.get(server).keySet().size())) - .collect(Collectors.toList()); - // if there's a server w/ excess that has a single instance of a cell that we already have, - // prioritize that first (easy -2) - for (int source : overloadedServersMostToLeastCells) { - for (int target : underloadedServers) { - Map cellsOnSource = cellGroupSizesPerServer.get(source); - Map cellsOnTarget = cellGroupSizesPerServer.get(target); - - List singletonCellsOnSourceWeCanMoveToTarget = - cellsOnSource.keySet().stream() - .filter(cell -> cellsOnSource.get(cell) == 1) - .filter(cellsOnTarget::containsKey) - .collect(Collectors.toList()); - - if (!singletonCellsOnSourceWeCanMoveToTarget.isEmpty()) { - short cellToMove = singletonCellsOnSourceWeCanMoveToTarget.get( - ThreadLocalRandom.current().nextInt(singletonCellsOnSourceWeCanMoveToTarget.size())); - - return moveCell("restore -1", source, cellToMove, target, cellGroupSizesPerServer, cluster); - } + double reservoirRandom = -1; + for (int server = 0; server < cluster.numServers; server++) { + if (server == fromServer) { + continue; } - } - - // if there's a server w/ a singleton that we don't already have, accept it - // prioritize that next (0) - for (int source : overloadedServersMostToLeastCells) { - for (int target : underloadedServers) { - Map cellsOnSource = cellGroupSizesPerServer.get(source); - - List cellsOnSourcePresentOnTarget = - cellsOnSource.keySet() - .stream() - .filter(cell -> cellsOnSource.get(cell) == 1) - .collect(Collectors.toList()); - if (!cellsOnSourcePresentOnTarget.isEmpty()) { - short cellToMove = cellsOnSourcePresentOnTarget.get(ThreadLocalRandom.current().nextInt(cellsOnSourcePresentOnTarget.size())); + Map countsForToCandidate = cellGroupSizesPerServer.get(server); + Set candidateCellsOnTo = new HashSet<>(); + for (short cellOnTo : countsForToCandidate.keySet()) { + int regionsForCell = cellCounts[cellOnTo]; + int expectedCountOnAllServers = Ints.checkedCast((long) Math.floor((double) regionsForCell / cluster.numServers)); - return moveCell("restore 0", source, cellToMove, target, cellGroupSizesPerServer, cluster); + if (!countsForFromServer.containsKey(cellOnTo) || countsForFromServer.get(cellOnTo) <= expectedCountOnAllServers) { + candidateCellsOnTo.add(cellOnTo); } } - } - - // if there's a server w/ excess that has more than one instance of a cell that we already have, - // prioritize that next (0) - for (int source : overloadedServersMostToLeastCells) { - for (int target : underloadedServers) { - Map cellsOnSource = cellGroupSizesPerServer.get(source); - Map cellsOnTarget = cellGroupSizesPerServer.get(target); - - List cellsOnSourcePresentOnTarget = - cellsOnSource.keySet() - .stream() - .filter(cellsOnTarget::containsKey) - .collect(Collectors.toList()); - if (!cellsOnSourcePresentOnTarget.isEmpty()) { - short cellToMove = cellsOnSourcePresentOnTarget.get(ThreadLocalRandom.current().nextInt(cellsOnSourcePresentOnTarget.size())); - - return moveCell("restore 0", source, cellToMove, target, cellGroupSizesPerServer, cluster); + if (!countsForToCandidate.containsKey(cell) && + !candidateCellsOnTo.isEmpty()) { + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(Pair.newPair(candidateCellsOnTo.stream().findAny().get(), server)); } } } - int target = - underloadedServers.get(ThreadLocalRandom.current().nextInt(underloadedServers.size())); - - - // ok, we give up. just pick a random region from the least loaded cell of some instance and call it a day - // this will be (+1) but allows balancing to continue - int source = overloadedServersMostToLeastCells.get( - ThreadLocalRandom.current().nextInt(Math.min(overloadedServersMostToLeastCells.size(), 5))); - short cellToMove = pickLeastFrequentCell(cellGroupSizesPerServer.get(source)); - - return moveCell("restore +1", source, cellToMove, target, cellGroupSizesPerServer, cluster); - } - - private BalanceAction giveAwaySomeRegionToImprove( - Pair cellOnServer, - List> cellGroupSizesPerServer, - int[] cellCounts, - BalancerClusterState cluster - ) { - - short cell = cellOnServer.getFirst(); - int sourceServer = cellOnServer.getSecond(); - - Map cellCountsOnSource = cellGroupSizesPerServer.get(sourceServer); - Set cellsOnSource = cellCountsOnSource.keySet(); - - - Optional otherServerWithThisCell = pickOtherServerWithThisCellToGiveItTo( - cell, sourceServer, cellGroupSizesPerServer, cluster - ); - - int targetServer = NO_SERVER; - - if (otherServerWithThisCell.isPresent()) { - targetServer = otherServerWithThisCell.get(); - } else { - Optional lowerLoadedServer = - pickOtherLowerLoadedServerToGiveCell(sourceServer, cellGroupSizesPerServer, cluster); - - if (lowerLoadedServer.isPresent()) { - targetServer = lowerLoadedServer.get(); - } - } - - if (targetServer == NO_SERVER) { - return BalanceAction.NULL_ACTION; - } - - MoveRegionAction action = moveCell("give away", sourceServer, cell, targetServer, cellGroupSizesPerServer, cluster); - - if (LOG.isDebugEnabled() || DEBUG_MINOR) { - Map cellsOnTarget = cellGroupSizesPerServer.get(targetServer); - int sourceOldTotal = cellsOnSource.size(); - int sourceNewTotal = cellsOnSource.size() - (cellCountsOnSource.get(cell) == 1 ? 1 : 0); - int targetOldTotal = cellsOnTarget.size(); - int targetNewTotal = cellsOnTarget.size() - (cellsOnTarget.get(cell) == 1 ? 1 : 0); - - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; - - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Moving s%d.r%d -> s%d [cell = %d]. SOURCE has %d copies, TARGET has %d copies. Change is %s\n", -// action.getFromServer(), -// action.getRegion(), -// action.getToServer(), -// cell, -// cellCountsOnSource.get(cell), -// cellsOnTarget.get(cell), -// descrOfQuality -// ); - LOG.debug("Moving s{}.r{} -> s{} [cell = {}]. SOURCE has {} copies, TARGET has {} copies. Change is {}", - action.getFromServer(), - action.getRegion(), - action.getToServer(), - cell, - cellCountsOnSource.get(cell), - cellsOnTarget.get(cell), - descrOfQuality - ); - } - - return action; + return result; } - private Optional pickOtherLowerLoadedServerToGiveCell( - int sourceServer, + private Optional pickServerWithoutEnoughIsolation( + BalancerClusterState cluster, List> cellGroupSizesPerServer, - BalancerClusterState cluster + int targetCellsPerServer ) { - List serversByCellCountAsc = - IntStream.range(0, cluster.numServers).boxed().filter(server -> server != sourceServer) - .sorted(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())) - .collect(Collectors.toList()); - - int serverToPick = NO_SERVER; - int lowestCountSoFar = Integer.MAX_VALUE; + Optional result = Optional.empty(); + int lowestSoFar = Integer.MAX_VALUE; double reservoirRandom = -1; - - for (int server : serversByCellCountAsc) { - int cellCount = cellGroupSizesPerServer.get(server).keySet().size(); - if (cellCount < lowestCountSoFar) { - serverToPick = server; - lowestCountSoFar = cellCount; - reservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (cellCount == lowestCountSoFar) { - double serverRandom = ThreadLocalRandom.current().nextDouble(); - if (serverRandom > reservoirRandom) { - serverToPick = server; - reservoirRandom = serverRandom; + for (int server = 0; server < cluster.numServers; server++) { + int numCellsOnServer = cellGroupSizesPerServer.get(server).keySet().size(); + if (numCellsOnServer < targetCellsPerServer) { + if (numCellsOnServer < lowestSoFar) { + lowestSoFar = numCellsOnServer; + reservoirRandom = ThreadLocalRandom.current().nextDouble(); + result = Optional.of(server); + } else if (numCellsOnServer == lowestSoFar) { + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(server); + } } } } - return Optional.of(serverToPick).filter(server -> server != NO_SERVER); + return result; } - private Optional pickOtherServerWithThisCellToGiveItTo( - short cell, - int sourceServer, + private BalanceAction tryMoveRegionToSomeUnderloadedServer( + BalancerClusterState cluster, + int[] cellCounts, List> cellGroupSizesPerServer, - BalancerClusterState cluster - ) { - return IntStream.range(0, cluster.numServers) - .boxed() - .filter(server -> server != sourceServer) - .filter(server -> cellGroupSizesPerServer.get(server).containsKey(cell)) - .filter(server -> cluster.regionsPerServer[server].length <= Math.ceil((double) cluster.numRegions / cluster.numServers)) - .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).get(cell))); - } - - private short pickLeastFrequentCell( - Map cellCounts + int targetRegionsPerServer ) { - short cellToPick = -1; - int lowestCountSoFar = Integer.MAX_VALUE; - double reservoirRandom = -1; - - for (short cell : cellCounts.keySet()) { - int count = cellCounts.get(cell); - if (count < lowestCountSoFar) { - cellToPick = cell; - lowestCountSoFar = count; - reservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (count == lowestCountSoFar) { - double cellRandom = ThreadLocalRandom.current().nextDouble(); - if (cellRandom > reservoirRandom) { - cellToPick = cell; - reservoirRandom = cellRandom; - } - } + Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer); + if (!toServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; } - return cellToPick; - } - - private short pickMostFrequentCell( - Map cellCounts - ) { - short cellToPick = -1; - int highestCountSoFar = Integer.MIN_VALUE; - double reservoirRandom = -1; - - for (short cell : cellCounts.keySet()) { - int count = cellCounts.get(cell); - if (count > highestCountSoFar) { - cellToPick = cell; - highestCountSoFar = count; - reservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (count == highestCountSoFar) { - double cellRandom = ThreadLocalRandom.current().nextDouble(); - if (cellRandom > reservoirRandom) { - cellToPick = cell; - reservoirRandom = cellRandom; - } - } + int toServer = toServerMaybe.get(); + Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer); + if (!fromServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; } + int fromServer = fromServerMaybe.get(); + short cell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); - return cellToPick; + return moveCell("fill underloaded", fromServer, cell, toServer, cellGroupSizesPerServer, cluster); } - private BalanceAction swapSomeRegionToImprove(Pair cellOnServer, - List> cellGroupSizesPerServer, BalancerClusterState cluster) { - - short sourceCell = cellOnServer.getFirst(); - int targetServer = cellOnServer.getSecond(); + private short pickCellToMove(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { + return cellCountsForServer.keySet().stream() + .max(Comparator.comparing(cell -> { + int regionsForCell = cellCounts[cell]; + int expectedCountOnAllServers = Ints.checkedCast((long) Math.floor((double) regionsForCell / cluster.numServers)); - Map cellCountsOnTargetServer = cellGroupSizesPerServer.get(targetServer); - Set cellsOnTargetServer = cellCountsOnTargetServer.keySet(); + return cellCountsForServer.get(cell) - expectedCountOnAllServers; + })) + .get(); + } - if (cluster.regionsPerServer[targetServer].length == 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("{} has no regions", targetServer); + private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + for (int server = 0; server < cluster.numServers; server++) { + if (cluster.regionsPerServer[server].length > targetRegionsPerServer) { + return Optional.of(server); } - return BalanceAction.NULL_ACTION; } - Set sourceCandidateSet = new HashSet<>(); - for (int sourceServerCandidate = 0; sourceServerCandidate < cellGroupSizesPerServer.size(); sourceServerCandidate++) { - if (sourceServerCandidate == targetServer) { - continue; - } - - Map cellsOnSourceCandidate = cellGroupSizesPerServer.get(sourceServerCandidate); + return Optional.empty(); + } - // if that server is perfectly isolated, don't allow that to be broken even to fix another - if (cellsOnSourceCandidate.keySet().size() == 1) { - continue; + private Optional pickUnderloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + for (int server = 0; server < cluster.numServers; server++) { + if (cluster.regionsPerServer[server].length < targetRegionsPerServer) { + return Optional.of(server); } - - if (cellsOnSourceCandidate.containsKey(sourceCell)) { - sourceCandidateSet.add(sourceServerCandidate); - - Sets.SetView cellsInCommon = - Sets.intersection(cellsOnTargetServer, cellsOnSourceCandidate.keySet()); - - if (cellsInCommon.size() > 1) { - short commonCellToSwap = - cellsInCommon.stream().filter(cell -> cell != sourceCell).findAny().get(); - SwapRegionsAction action = swapCells("improve frequent 1", sourceServerCandidate, sourceCell, targetServer, commonCellToSwap, cellGroupSizesPerServer, cluster); - if (LOG.isDebugEnabled() || DEBUG_MAJOR) { - int sourceOldTotal = cellsOnSourceCandidate.size(); - int sourceNewTotal = cellsOnSourceCandidate.size() - (cellsOnSourceCandidate.get(sourceCell) == 1 ? 1 : 0); - int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(commonCellToSwap) == 1 ? 1 : 0); - - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; - - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " -// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", -// action.getFromServer(), -// action.getFromRegion(), -// action.getToServer(), -// action.getToRegion(), -// commonCellToSwap, -// cellCountsOnTargetServer.get(commonCellToSwap), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellsOnSourceCandidate.get(sourceCell), -// commonCellToSwap, -// cellsOnSourceCandidate.get(commonCellToSwap), -// descrOfQuality -// ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " - + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - commonCellToSwap, - cellCountsOnTargetServer.get(commonCellToSwap), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSourceCandidate.get(sourceCell), - commonCellToSwap, - cellsOnSourceCandidate.get(commonCellToSwap), - descrOfQuality - ); - } - return action; - } - } - } - - List candidates = new ArrayList<>(sourceCandidateSet); - - if (candidates.isEmpty()) { - // this means we've reached the end of the road for this particular cell - return BalanceAction.NULL_ACTION; - } - - int sourceServer = candidates.get(ThreadLocalRandom.current().nextInt(candidates.size())); - Map cellsOnSource = cellGroupSizesPerServer.get(sourceServer); - short targetCell = cellsOnTargetServer.stream() - .filter(cell -> cell != sourceCell) - .sorted(Comparator.comparing(cellCountsOnTargetServer::get)) - .findFirst() - .get(); - - SwapRegionsAction action = swapCells("improve frequent 2", sourceServer, sourceCell, targetServer, targetCell, cellGroupSizesPerServer, cluster); - - if (LOG.isDebugEnabled() || DEBUG_MAJOR) { - int sourceOldTotal = cellsOnSource.size(); - int sourceNewTotal = cellsOnSource.size() - (cellsOnSource.get(sourceCell) == 1 ? 1 : 0); - int targetOldTotal = cellsOnTargetServer.size(); - int targetNewTotal = cellCountsOnTargetServer.size() - (cellCountsOnTargetServer.get(sourceCell) == 1 ? 1 : 0); - - boolean sourceImproves = sourceNewTotal < sourceOldTotal; - boolean targetImproves = targetNewTotal < targetOldTotal; - boolean sourceStaysSame = sourceOldTotal == sourceNewTotal; - boolean targetStaysSame = targetOldTotal == targetNewTotal; - - String descrOfQuality = - (sourceImproves && targetImproves) ? "GREAT" : - ((sourceStaysSame && targetImproves) || (sourceImproves && targetStaysSame)) ? "GOOD" : - (sourceStaysSame && targetStaysSame) ? "NEUTRAL" : - "BAD"; - -// System.out.printf( -// "Swapping s%d.r%d for s%d.r%d. SOURCE loses %d (%d copies) and gains %d (%d copies), " -// + "TARGET loses %d (%d copies) and gains %d (%d copies). Change is %s\n", -// action.getFromServer(), -// action.getFromRegion(), -// action.getToServer(), -// action.getToRegion(), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellCountsOnTargetServer.get(sourceCell), -// sourceCell, -// cellsOnSource.get(sourceCell), -// sourceCell, -// cellsOnSource.get(sourceCell), -// descrOfQuality -// ); - LOG.debug("Swapping s{}.r{} to s{}.r{}. SOURCE loses {} ({} copies) and gains {} ({} copies), " - + "TARGET loses {} ({} copies) and gains {} ({} copies). Change is {}", - action.getFromServer(), - action.getFromRegion(), - action.getToServer(), - action.getToRegion(), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellCountsOnTargetServer.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - sourceCell, - cellsOnSource.get(sourceCell), - descrOfQuality - ); } - return action; + return Optional.empty(); } private MoveRegionAction moveCell( @@ -760,27 +248,22 @@ private MoveRegionAction moveCell( List> cellGroupSizesPerServer, BalancerClusterState cluster ) { - Map fromCounts = cellGroupSizesPerServer.get(fromServer); - Map toCounts = cellGroupSizesPerServer.get(toServer); - - String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + - fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "")) - .collect(Collectors.joining(", ", "{", "}")); - String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + - toCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "!!" : "")) - .collect(Collectors.joining(", ", "{", "}")); - - int fromEmptiesFromCell = fromCounts.get(fromCell) == 1 ? -1 : 0; - int toGainsNewCell = toCounts.getOrDefault(fromCell, 0) == 0 ? 1 : 0; - - int change = fromEmptiesFromCell + toGainsNewCell; - - System.out.printf("[%20s]\t\t%2d\tmove %d:%d -> %d %s -> %s\n", - originStep, - change, - fromServer, fromCell, - toServer, fromCountsString, toCountsString - ); + if (LOG.isDebugEnabled()) { + Map fromCounts = cellGroupSizesPerServer.get(fromServer); + Map toCounts = cellGroupSizesPerServer.get(toServer); + + String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + + fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "")) + .collect(Collectors.joining(", ", "{", "}")); + String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + + toCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + + LOG.debug("{}", String.format("[%20s]\t\tmove %d:%d -> %d %s -> %s\n", + originStep, + fromServer, fromCell, + toServer, fromCountsString, toCountsString)); + } return (MoveRegionAction) getAction(fromServer, resolveCellToRegion(cluster, fromServer, fromCell), toServer, NO_REGION); } @@ -792,29 +275,22 @@ private SwapRegionsAction swapCells( List> cellGroupSizesPerServer, BalancerClusterState cluster ) { - Map fromCounts = cellGroupSizesPerServer.get(fromServer); - Map toCounts = cellGroupSizesPerServer.get(toServer); - - String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + - fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "")) - .collect(Collectors.joining(", ", "{", "}")); - String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + - toCounts.entrySet().stream().map(entry -> (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "")) - .collect(Collectors.joining(", ", "{", "}")); - - int fromEmptiesFromCell = fromCounts.get(fromCell) == 1 ? -1 : 0; - int fromGainsNewCell = fromCounts.getOrDefault(toCell, 0) == 0 ? 1 : 0; - int toEmptiesToCell = toCounts.get(toCell) == 1 ? -1 : 0; - int toGainsNewCell = toCounts.getOrDefault(fromCell, 0) == 0 ? 1 : 0; - - int change = fromEmptiesFromCell + fromGainsNewCell + toEmptiesToCell + toGainsNewCell; - - System.out.printf("[%20s]\t\t%2d\tswap %3d:%3d <-> %3d:%3d %s <-> %s\n", - originStep, - change, - fromServer, fromCell, - toServer, toCell, fromCountsString, toCountsString - ); + if (LOG.isDebugEnabled()) { + Map fromCounts = cellGroupSizesPerServer.get(fromServer); + Map toCounts = cellGroupSizesPerServer.get(toServer); + + String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + + fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + + toCounts.entrySet().stream().map(entry -> (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "")) + .collect(Collectors.joining(", ", "{", "}")); + + LOG.debug("{}", String.format("[%20s]\t\tswap %3d:%3d <-> %3d:%3d %s <-> %s\n", + originStep, + fromServer, fromCell, + toServer, toCell, fromCountsString, toCountsString)); + } return (SwapRegionsAction) getAction( fromServer, @@ -840,133 +316,11 @@ private int pickRegionForCell(Multimap cellsByRegionOnServer, sh static List computeCellsPerRs(BalancerClusterState cluster) { List> cellGroupSizesPerServer = IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, serverIndex, - cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + serverIndex -> computeCellGroupSizes(cluster, cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); } - private Pair pickMostFrequentCellOnAnyUnsaturatedServer( - List> cellGroupSizesPerServer, int[] cellCounts, BalancerClusterState cluster) { - cluster.sortServersByRegionCount(); - int[][] regionsPerServer = cluster.regionsPerServer; - - Pair mostFrequentCellOnServer = Pair.newPair((short) -1, -1); - - int targetCellsPerServer = Ints.checkedCast( - (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); - int highestCellCountSoFar = Integer.MIN_VALUE; - double mostCellsReservoirRandom = -1; - - for (int serverIndex = 0; serverIndex < regionsPerServer.length; serverIndex++) { - int[] regionsForServer = regionsPerServer[serverIndex]; - Map cellsOnServer = cellGroupSizesPerServer.get(serverIndex); - - Set cellsOnThisServerAndOthers = - cellsOnServer.keySet().stream().filter(cell -> cellsOnServer.get(cell) < cellCounts[cell]) - .collect(Collectors.toSet()); - - if (cellsOnServer.keySet().size() <= targetCellsPerServer - // if we have a small cell where the entire cell is local, we MUST have at least 2 cells on this server to have - // an overall region balance, so allow us to go over the target by 1 cell - || cellsOnThisServerAndOthers.size() == 1) { - continue; - } - - List> cellsByFrequencyAsc = - cellsOnServer.entrySet().stream().sorted(Map.Entry.comparingByValue()) - .collect(Collectors.toList()); - - if (cellsByFrequencyAsc.isEmpty()) { - continue; - } - - - int probe = cellsByFrequencyAsc.size() - 1; - short mostFrequentCellTemp = -1; - int mostFrequentCellCountTemp = -1; - - do { - Map.Entry entry = cellsByFrequencyAsc.get(probe); - mostFrequentCellTemp = entry.getKey(); - mostFrequentCellCountTemp = entry.getValue(); - probe--; - } while(mostFrequentCellCountTemp == cellCounts[mostFrequentCellTemp] && probe >= 0); - - final short mostFrequentCell = mostFrequentCellTemp; - final int mostFrequentCellCount = mostFrequentCellCountTemp; - - // if we've collected all of the regions for a given cell on one server, we can't improve - if (mostFrequentCellCount == cellCounts[mostFrequentCell]) { - continue; - } - - long numServersWithMostFrequentCellNotSaturated = - cellGroupSizesPerServer.stream().filter(cellMap -> cellMap.containsKey(mostFrequentCell)) - .filter(cellMap -> cellMap.keySet().size() > 1).count(); - // if we're down to only one server unsaturated with the most frequent cell, there are no good swaps - if (numServersWithMostFrequentCellNotSaturated == 1) { - continue; - } - - if (LOG.isTraceEnabled()) { - LOG.trace("Server {} has {} regions, which have {} cells", serverIndex, - Arrays.stream(regionsForServer).boxed().sorted().collect(Collectors.toList()), - cellsOnServer.size()); - } - - // we don't know how many servers have the same cell count, so use a simplified online - // reservoir sampling approach (http://gregable.com/2007/10/reservoir-sampling.html) - if (mostFrequentCellCount > highestCellCountSoFar) { - mostFrequentCellOnServer = Pair.newPair(mostFrequentCell, serverIndex); - highestCellCountSoFar = mostFrequentCellCount; - mostCellsReservoirRandom = ThreadLocalRandom.current().nextDouble(); - } else if (mostFrequentCellCount == highestCellCountSoFar) { - double maxCellRandom = ThreadLocalRandom.current().nextDouble(); - if (maxCellRandom > mostCellsReservoirRandom) { - mostFrequentCellOnServer = Pair.newPair(mostFrequentCell, serverIndex); - mostCellsReservoirRandom = maxCellRandom; - } - } - } - - return mostFrequentCellOnServer; - } - - private Pair pickLeastFrequentCellOnMostLoadedServer( - List> cellGroupSizesPerServer, - int[] cellCounts, - BalancerClusterState cluster - ) { - int targetCellsPerServer = Ints.checkedCast( - (long) Math.ceil((double) HubSpotCellCostFunction.MAX_CELL_COUNT / cluster.numServers)); - - Optional highestLoadedServerMaybe = IntStream.range(0, cluster.numServers).boxed() - .filter(server -> cellGroupSizesPerServer.get(server).keySet().size() > targetCellsPerServer) - .max(Comparator.comparing(server -> cellGroupSizesPerServer.get(server).keySet().size())); - - if (!highestLoadedServerMaybe.isPresent()) { - return Pair.newPair((short)-1, NO_SERVER); - } - - int sampleHighestLoadedServer = highestLoadedServerMaybe.get(); - int maxCellsOnAnyServer = cellGroupSizesPerServer.get(sampleHighestLoadedServer).keySet().size(); - List maxLoadedServers = IntStream.range(0, cluster.numServers).boxed() - .filter(server -> cellGroupSizesPerServer.get(server).keySet().size() == maxCellsOnAnyServer) - .collect(Collectors.toList()); - int highestLoadedServer = maxLoadedServers.get(ThreadLocalRandom.current().nextInt(maxLoadedServers.size())); - - Map cellCountsForHighestLoadedServer = cellGroupSizesPerServer.get(highestLoadedServer); - int numCellsOnHighestLoadedServer = cellCountsForHighestLoadedServer.keySet().size(); - - if (numCellsOnHighestLoadedServer <= targetCellsPerServer + 1) { - return Pair.newPair((short) -1, -1); - } - - return Pair.newPair(pickLeastFrequentCell(cellCountsForHighestLoadedServer), highestLoadedServer); - } - - private static Map computeCellGroupSizes(BalancerClusterState cluster, - int serverIndex, int[] regionsForServer) { + private static Map computeCellGroupSizes(BalancerClusterState cluster, int[] regionsForServer) { Map cellGroupSizes = new HashMap<>(); int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index efc6e5f887d0..b7b5ad86779a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -308,20 +308,16 @@ void prepare(BalancerClusterState cluster) { int changeInOverassignedRegionCells = 0; for (short movingCell : cellsOnRegion) { // this is invoked AFTER the region has been moved - int oldServerCellCount = numRegionsForCellOnOldServer.getOrDefault(movingCell, 0) + 1; - int newServerCellCount = numRegionsForCellOnNewServer.get(movingCell); + boolean didMoveDecreaseCellsOnOldServer = !numRegionsForCellOnOldServer.containsKey(movingCell); + boolean didMoveIncreaseCellsOnNewServer = numRegionsForCellOnNewServer.get(movingCell) == 1; - if (oldServerCellCount == 1) { - if (currentCellCountOldServer > bestCaseMaxCellsPerServer) { - changeInOverassignedRegionCells--; - } + if (didMoveDecreaseCellsOnOldServer) { + changeInOverassignedRegionCells++; serverHasCell[oldServer][movingCell] = false; } - if (newServerCellCount == 0) { - if (currentCellCountNewServer > bestCaseMaxCellsPerServer) { - changeInOverassignedRegionCells++; - } + if (didMoveIncreaseCellsOnNewServer) { + changeInOverassignedRegionCells--; serverHasCell[newServer][movingCell] = true; } } From 378ad33da5cfcfdf648b393b554aa5d1d5adb772 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 26 Nov 2024 18:09:10 -0500 Subject: [PATCH 086/126] Not for inclusion --- .../master/balancer/HubSpotScratchFile.java | 146 ------------------ 1 file changed, 146 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java deleted file mode 100644 index d702b5823df4..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotScratchFile.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.apache.hadoop.hbase.master.balancer; - -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.OpenOption; -import java.nio.file.Paths; -import java.nio.file.StandardOpenOption; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import com.google.common.math.Quantiles; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hbase.thirdparty.com.google.common.collect.Sets; -import org.apache.hbase.thirdparty.com.google.common.math.Stats; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.base.Charsets; -import org.apache.hbase.thirdparty.com.google.common.io.Resources; - -public class HubSpotScratchFile { - private static final Logger LOG = LoggerFactory.getLogger(HubSpotScratchFile.class); - - public static void main(String[] args) throws IOException { - BalancerClusterState original = loadCluster("cluster.json"); - BalancerClusterState state = loadCluster("cluster_partial.json"); - - HubSpotCellCostFunction func = - new HubSpotCellCostFunction(new Configuration()); - HubSpotCellBasedCandidateGenerator generator = new HubSpotCellBasedCandidateGenerator(); - - func.prepare(state); - double cost = func.cost(); - Set movedRegions = new HashSet<>(); - Set fromServers = new HashSet<>(); - Set toServers = new HashSet<>(); - Set repeatMoveRegions = new HashSet<>(); - - double lastCost = cost; - int printFrequency = 500; - int lastSnapshotAt = 10; - - for (int step = 0; step < 200_000; step++) { - if (step % printFrequency == 0) { - double costDelta = cost - lastCost; - lastCost = cost; - double costPerStep = costDelta / printFrequency; - - List size = HubSpotCellBasedCandidateGenerator.computeCellsPerRs(state); - Map quantiles = - Quantiles.scale(100).indexes(10, 20, 30, 40, 50, 60, 70, 80, 90, 100).compute(size); - - System.out.printf("Step %d --> %.2f - %d regions moved (%d more than once), %d sources, %d targets. Moving %.2f per step, cumulative %.2f drop\t\t\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t\n", - step, cost, movedRegions.size(), repeatMoveRegions.size(), fromServers.size(), toServers.size(), costPerStep, costDelta, - quantiles.get(10), quantiles.get(20),quantiles.get(30),quantiles.get(40),quantiles.get(50),quantiles.get(60),quantiles.get(70),quantiles.get(80),quantiles.get(90),quantiles.get(100)); - - if (quantiles.get(100) < lastSnapshotAt) { - lastSnapshotAt = (int) Math.ceil(quantiles.get(100)); - writeStringToFile("/Users/eszabowexler/Downloads/cluster_partial.json", HubSpotCellCostFunction.OBJECT_MAPPER.toJson(state)); - writeStringToFile(String.format("/Users/eszabowexler/Downloads/hbase_instructions_%d.txt", lastSnapshotAt), generateShellCommands(original, state)); - } - } - BalanceAction action = generator.generate(state); - if (action instanceof SwapRegionsAction) { - SwapRegionsAction swapRegionsAction = (SwapRegionsAction) action; - - if (movedRegions.contains(swapRegionsAction.getFromRegion())) { - repeatMoveRegions.add(swapRegionsAction.getFromServer()); - } - if (movedRegions.contains(swapRegionsAction.getToRegion())) { - repeatMoveRegions.add(swapRegionsAction.getToRegion()); - } - - movedRegions.add(swapRegionsAction.getFromRegion()); - movedRegions.add(swapRegionsAction.getToRegion()); - fromServers.add(swapRegionsAction.getFromServer()); - toServers.add(swapRegionsAction.getToServer()); - } - - state.doAction(action); - func.postAction(action); - cost = func.cost(); - } - - LOG.info("{}", state); - } - - private static String generateShellCommands( - BalancerClusterState original, - BalancerClusterState state - ) { - int[][] newRegionsPerServer = state.regionsPerServer; - int[][] oldRegionsPerServer = original.regionsPerServer; - - return IntStream.range(0, original.numServers) - .boxed() - .flatMap(server -> { - int[] oldRegionsRaw = oldRegionsPerServer[server]; - int[] newRegionsRaw = newRegionsPerServer[server]; - - Set oldRegions = - Arrays.stream(oldRegionsRaw).mapToObj(oldRegion -> original.regions[oldRegion]) - .map(RegionInfo::getEncodedName) - .collect(Collectors.toSet()); - Set newRegions = - Arrays.stream(newRegionsRaw).mapToObj(newRegion -> state.regions[newRegion]) - .map(RegionInfo::getEncodedName) - .collect(Collectors.toSet()); - - Sets.SetView regionsMovedToThisServer = Sets.difference(newRegions, oldRegions); - ServerName serverName = state.servers[server]; - - return regionsMovedToThisServer.stream() - .map(encodedRegionName -> String.format("move '%s', '%s'", encodedRegionName, serverName.getServerName())); - }) - .collect(Collectors.joining("\n")); - } - - private static BalancerClusterState loadCluster(String filename) throws IOException { - System.out.printf("Loading %s\n", filename); - String file = Resources.readLines(new URL("file:///Users/eszabowexler/Downloads/" + filename), Charsets.UTF_8).stream() - .collect(Collectors.joining("\n")); - BalancerClusterState state = - HubSpotCellCostFunction.OBJECT_MAPPER.fromJson(file, BalancerClusterState.class); - System.out.printf("Loaded %s!\n", filename); - return state; - } - - // function to write string to file by absolute path - public static void writeStringToFile(String path, String content) { - try { - System.out.printf("Writing %s\n", path); - Files.write(Paths.get(path), content.getBytes()); - System.out.printf("Wrote %s!\n", path); - } catch (IOException e) { - e.printStackTrace(); - } - } -} From c0895ba36aa3f3644af6e1e79e9398fdb761c82a Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 13:35:28 -0500 Subject: [PATCH 087/126] Extract static methods, simplify --- .../HubSpotCellBasedCandidateGenerator.java | 34 +- .../balancer/HubSpotCellCostFunction.java | 297 ++---------------- .../master/balancer/HubSpotCellUtilities.java | 245 +++++++++++++++ .../balancer/TestHubSpotCellCostFunction.java | 14 +- 4 files changed, 292 insertions(+), 298 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index b7c94f815737..83db8d36e94e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -55,11 +55,11 @@ cluster.regionsPerServer.length, cluster.regions.length, cluster.tables); } - int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; + int[] cellCounts = new int[HubSpotCellUtilities.MAX_CELL_COUNT]; Arrays.stream(cluster.regions) - .flatMap(region -> HubSpotCellCostFunction.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellCostFunction.MAX_CELL_COUNT).stream()) + .flatMap(region -> HubSpotCellUtilities.toCells(region.getStartKey(), region.getEndKey(), HubSpotCellUtilities.MAX_CELL_COUNT).stream()) .forEach(cellOnRegion -> cellCounts[cellOnRegion]++); - double[] cellPercents = new double[HubSpotCellCostFunction.MAX_CELL_COUNT]; + double[] cellPercents = new double[HubSpotCellUtilities.MAX_CELL_COUNT]; for (int i = 0; i < cellCounts.length; i++) { cellPercents[i] = (double) cellCounts[i] / cluster.numRegions; } @@ -80,7 +80,7 @@ private BalanceAction generateAction( (long) Math.floor((double) cluster.numRegions / cluster.numServers)); int numTimesCellRegionsFillAllServers = 0; - for (int cell = 0; cell < HubSpotCellCostFunction.MAX_CELL_COUNT; cell++) { + for (int cell = 0; cell < HubSpotCellUtilities.MAX_CELL_COUNT; cell++) { int numRegionsForCell = cellCounts[cell]; numTimesCellRegionsFillAllServers += Ints.checkedCast((long) Math.floor((double) numRegionsForCell / cluster.numServers)); } @@ -322,7 +322,7 @@ static List computeCellsPerRs(BalancerClusterState cluster) { private static Map computeCellGroupSizes(BalancerClusterState cluster, int[] regionsForServer) { Map cellGroupSizes = new HashMap<>(); - int[] cellCounts = new int[HubSpotCellCostFunction.MAX_CELL_COUNT]; + int[] cellCounts = new int[HubSpotCellUtilities.MAX_CELL_COUNT]; for (int regionIndex : regionsForServer) { if (regionIndex < 0 || regionIndex > cluster.regions.length) { @@ -352,24 +352,24 @@ private static Map computeCellGroupSizes(BalancerClusterState cl Bytes.toShort(startKey, 0, 2) : Bytes.toShort(new byte[] { 0, startKey[0] })); short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (short) (HubSpotCellUtilities.MAX_CELL_COUNT - 1) : (endKey.length >= 2 ? Bytes.toShort(endKey, 0, 2) : Bytes.toShort(new byte[] { -1, endKey[0] })); - if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; + if (startCellId < 0 || startCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { + startCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; } - if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; + if (endCellId < 0 || endCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { + endCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; } for (short i = startCellId; i < endCellId; i++) { cellCounts[i]++; } - if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { + if (HubSpotCellUtilities.isStopInclusive(endKey)) { cellCounts[endCellId]++; } } @@ -405,24 +405,24 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio Bytes.toShort(startKey, 0, 2) : Bytes.toShort(new byte[] { 0, startKey[0] })); short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellCostFunction.MAX_CELL_COUNT - 1) : + (short) (HubSpotCellUtilities.MAX_CELL_COUNT - 1) : (endKey.length >= 2 ? Bytes.toShort(endKey, 0, 2) : Bytes.toShort(new byte[] { -1, endKey[0] })); - if (startCellId < 0 || startCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - startCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; + if (startCellId < 0 || startCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { + startCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; } - if (endCellId < 0 || endCellId > HubSpotCellCostFunction.MAX_CELL_COUNT) { - endCellId = HubSpotCellCostFunction.MAX_CELL_COUNT - 1; + if (endCellId < 0 || endCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { + endCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; } for (short i = startCellId; i < endCellId; i++) { resultBuilder.put(regionIndex, i); } - if (!HubSpotCellCostFunction.isStopExclusive(endKey)) { + if (HubSpotCellUtilities.isStopInclusive(endKey)) { resultBuilder.put(regionIndex, endCellId); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index b7b5ad86779a..ed0354ab63b9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,23 +17,19 @@ */ package org.apache.hadoop.hbase.master.balancer; -import java.lang.reflect.Field; -import java.lang.reflect.Type; +import com.google.common.collect.Iterables; import java.util.Arrays; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,19 +37,7 @@ import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; -import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; -import org.apache.hbase.thirdparty.com.google.gson.ExclusionStrategy; -import org.apache.hbase.thirdparty.com.google.gson.FieldAttributes; -import org.apache.hbase.thirdparty.com.google.gson.Gson; -import org.apache.hbase.thirdparty.com.google.gson.GsonBuilder; -import org.apache.hbase.thirdparty.com.google.gson.JsonArray; -import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializationContext; -import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializer; -import org.apache.hbase.thirdparty.com.google.gson.JsonElement; -import org.apache.hbase.thirdparty.com.google.gson.JsonObject; -import org.apache.hbase.thirdparty.com.google.gson.JsonParseException; -import org.apache.hbase.thirdparty.com.google.gson.JsonSerializationContext; -import org.apache.hbase.thirdparty.com.google.gson.JsonSerializer; +import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; /** * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This @@ -68,134 +52,8 @@ public class HubSpotCellCostFunction extends CostFunction { private static final String HUBSPOT_CELL_COST_MULTIPLIER = "hbase.master.balancer.stochastic.hubspotCellCost"; - static class Int2IntCounterMapAdapter implements JsonSerializer, JsonDeserializer { - @Override public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, - JsonSerializationContext context) { - JsonObject obj = new JsonObject(); - - obj.addProperty("loadFactor", src.loadFactor()); - obj.addProperty("initialValue", src.initialValue()); - obj.addProperty("resizeThreshold", src.resizeThreshold()); - obj.addProperty("size", src.size()); - - Field entryField = null; - try { - entryField = Int2IntCounterMap.class.getDeclaredField("entries"); - } catch (NoSuchFieldException e) { - throw new RuntimeException(e); - } - entryField.setAccessible(true); - int[] entries = null; - try { - entries = (int[]) entryField.get(src); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - JsonArray entryArray = new JsonArray(entries.length); - for (int entry : entries) { - entryArray.add(entry); - } - obj.add("entries", entryArray); - - return obj; - } - - @Override public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, - JsonDeserializationContext context) throws JsonParseException { - JsonObject obj = json.getAsJsonObject(); - - float loadFactor = obj.get("loadFactor").getAsFloat(); - int initialValue = obj.get("initialValue").getAsInt(); - int resizeThreshold = obj.get("resizeThreshold").getAsInt(); - int size = obj.get("size").getAsInt(); - - JsonArray entryArray = obj.get("entries").getAsJsonArray(); - int[] entries = new int[entryArray.size()]; - - for (int i = 0; i < entryArray.size(); i++) { - entries[i] = entryArray.get(i).getAsInt(); - } - - Int2IntCounterMap result = new Int2IntCounterMap(0, loadFactor, initialValue); - - Field resizeThresholdField = null; - Field entryField = null; - Field sizeField = null; - - try { - resizeThresholdField = Int2IntCounterMap.class.getDeclaredField("resizeThreshold"); - entryField = Int2IntCounterMap.class.getDeclaredField("entries"); - sizeField = Int2IntCounterMap.class.getDeclaredField("size"); - } catch (NoSuchFieldException e) { - throw new RuntimeException(e); - } - - resizeThresholdField.setAccessible(true); - entryField.setAccessible(true); - sizeField.setAccessible(true); - - try { - resizeThresholdField.set(result, resizeThreshold); - entryField.set(result, entries); - sizeField.set(result, size); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - - return result; - } - } - - static final Gson OBJECT_MAPPER = new GsonBuilder() - .excludeFieldsWithoutExposeAnnotation() - .enableComplexMapKeySerialization() - .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) - .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { - JsonObject obj = json.getAsJsonObject(); - - boolean split = obj.get("split").getAsBoolean(); - long regionId = obj.get("regionId").getAsLong(); - int replicaId = obj.get("replicaId").getAsInt(); - JsonObject tableName = obj.get("tableName").getAsJsonObject(); - JsonArray startKey = obj.get("startKey").getAsJsonArray(); - JsonArray endKey = obj.get("endKey").getAsJsonArray(); - - byte[] startKeyBytes = new byte[startKey.size()]; - byte[] endKeyBytes = new byte[endKey.size()]; - - for (int i = 0; i < startKey.size(); i++) { - startKeyBytes[i] = startKey.get(i).getAsByte(); - } - for (int i = 0; i < endKey.size(); i++) { - endKeyBytes[i] = endKey.get(i).getAsByte(); - } - - TableName tb = TableName.valueOf( - tableName.get("namespaceAsString").getAsString(), - tableName.get("qualifierAsString").getAsString() - ); - - RegionInfo result = - RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) - .setReplicaId(replicaId).setStartKey(startKeyBytes).setEndKey(endKeyBytes).build(); - return result; - }) - .addDeserializationExclusionStrategy(new ExclusionStrategy() { - @Override public boolean shouldSkipField(FieldAttributes f) { - return f.getName().equals("serversToIndex") - || f.getName().equals("regionsToIndex") - || f.getName().equals("clusterState") - ; - } - - @Override public boolean shouldSkipClass(Class clazz) { - return false; - } - }) - .create(); private static final float DEFAULT_HUBSPOT_CELL_COST = 0; - // hack - hard code this for now - static final short MAX_CELL_COUNT = 360; + private static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); private int numServers; private short numCells; @@ -205,7 +63,6 @@ static class Int2IntCounterMapAdapter implements JsonSerializer 0 ) { try { - LOG.trace("{} cluster state:\n{}", cluster.tables, OBJECT_MAPPER.toJson(cluster)); + LOG.trace("{} cluster state:\n{}", cluster.tables, HubSpotCellUtilities.OBJECT_MAPPER.toJson(cluster)); } catch (Exception ex) { LOG.error("Failed to write cluster state", ex); } } this.serverHasCell = new boolean[numServers][numCells]; - int balancedRegionsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - this.regionCountByCell = new Int2IntCounterMap(MAX_CELL_COUNT, 0.5f, 0); + int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + this.regionCountByCell = new Int2IntCounterMap(HubSpotCellUtilities.MAX_CELL_COUNT, 0.5f, 0); Arrays.stream(cluster.regions) - .forEach(r -> toCells(r.getStartKey(), r.getEndKey(), MAX_CELL_COUNT).forEach(cell -> regionCountByCell.addAndGet((int) cell, 1))); - this.bestCaseMaxCellsPerServer = balancedRegionsPerServer; + .forEach(r -> HubSpotCellUtilities.toCells(r.getStartKey(), r.getEndKey(), HubSpotCellUtilities.MAX_CELL_COUNT).forEach(cell -> regionCountByCell.addAndGet((int) cell, 1))); int numTimesCellRegionsFillAllServers = 0; - for (int cell = 0; cell < MAX_CELL_COUNT; cell++) { + for (int cell = 0; cell < HubSpotCellUtilities.MAX_CELL_COUNT; cell++) { int numRegionsForCell = regionCountByCell.get(cell); numTimesCellRegionsFillAllServers += Ints.checkedCast((long) Math.floor((double) numRegionsForCell / numServers)); } - this.bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; + bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; this.numRegionCellsOverassigned = calculateCurrentCellCost( numCells, - numServers, - bestCaseMaxCellsPerServer, + numServers, bestCaseMaxCellsPerServer, regions, regionIndexToServerIndex, serverHasCell, super.cluster::getRegionSizeMB ); - if (regions.length > 0 + if (LOG.isTraceEnabled() + && regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") - && LOG.isTraceEnabled() ) { LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), snapshotState()); } } @Override boolean isNeeded() { - return cluster.tables.stream().anyMatch(name -> name.contains("objects-3")); + return cluster.tables.size() == 1 + && TABLES_TO_BALANCE.contains(Iterables.getOnlyElement(cluster.tables)) + && cluster.regions != null + && cluster.regions.length > 0; } @Override protected void regionMoved(int region, int oldServer, int newServer) { RegionInfo movingRegion = regions[region]; - if (!movingRegion.getTable().getNamespaceAsString().equals("default")) { - return; - } - - Set cellsOnRegion = toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + Set cellsOnRegion = HubSpotCellUtilities.toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); if (LOG.isDebugEnabled()) { LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", @@ -334,7 +188,7 @@ private Map computeCellFrequencyForServer(int server) { ImmutableMultimap.Builder regionsByCell = ImmutableMultimap.builder(); for (int regionIndex : regions) { RegionInfo region = cluster.regions[regionIndex]; - Set cellsInRegion = toCells(region.getStartKey(), region.getEndKey(), numCells); + Set cellsInRegion = HubSpotCellUtilities.toCells(region.getStartKey(), region.getEndKey(), numCells); cellsInRegion.forEach(cell -> regionsByCell.put(cell, regionIndex)); } @@ -374,7 +228,8 @@ private String snapshotState() { int regionSizeMb = super.cluster.getRegionSizeMB(i); String cellsInRegion = - toCellSetString(toCells(region.getStartKey(), region.getEndKey(), numCells)); + HubSpotCellUtilities.toCellSetString( + HubSpotCellUtilities.toCells(region.getStartKey(), region.getEndKey(), numCells)); stateString.append("\n\t").append(region.getShortNameToLog()).append("[") .append(Bytes.toHex(region.getStartKey())).append(", ") @@ -392,10 +247,6 @@ private String snapshotState() { return stateString.toString(); } - private static String toCellSetString(Set cells) { - return cells.stream().map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); - } - @Override protected double cost() { return numRegionCellsOverassigned; @@ -481,109 +332,7 @@ private static void setCellsForServer( byte[] endKey, short numCells ) { - short startCellId = (startKey == null || startKey.length == 0) - ? 0 - : (startKey.length >= 2 - ? Bytes.toShort(startKey, 0, 2) - : Bytes.toShort(new byte[] { 0, startKey[0] })); - short stopCellId = (endKey == null || endKey.length == 0) - ? (short) (numCells - 1) - : (endKey.length >= 2 - ? Bytes.toShort(endKey, 0, 2) - : Bytes.toShort(new byte[] { -1, endKey[0] })); - - if (stopCellId < 0 || stopCellId > numCells) { - stopCellId = numCells; - } - - if (startCellId == stopCellId) { - serverHasCell[startCellId] = true; - return; - } - - for (short i = startCellId; i < stopCellId; i++) { - serverHasCell[i] = true; - } - - // if everything after the cell prefix is 0, this stop key is actually exclusive - if (!isStopExclusive(endKey)) { - serverHasCell[stopCellId] = true; - } - } - - static boolean isStopExclusive(byte[] endKey) { - return endKey != null && endKey.length == 2 || (endKey.length > 2 && areSubsequentBytesAllZero(endKey, 2)); - } - - static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { - if (regionInfos == null || regionInfos.length == 0) { - return 0; - } - - Set cellsInRegions = Arrays.stream(regionInfos) - .map(region -> toCells(region.getStartKey(), region.getEndKey(), totalCellCount)) - .flatMap(Set::stream).collect(Collectors.toSet()); - return Shorts.checkedCast(cellsInRegions.size()); - } - - static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { - return range(padToTwoBytes(rawStart, (byte) 0), padToTwoBytes(rawStop, (byte) -1), numCells); - } - - private static byte[] padToTwoBytes(byte[] key, byte pad) { - if (key == null || key.length == 0) { - return new byte[] { pad, pad }; - } - - if (key.length == 1) { - return new byte[] { pad, key[0] }; - } - - return key; - } - - private static Set range(byte[] start, byte[] stop, short numCells) { - short stopCellId = toCell(stop); - if (stopCellId < 0 || stopCellId > numCells) { - stopCellId = numCells; - } - short startCellId = toCell(start); - - if (startCellId == stopCellId) { - return ImmutableSet.of(startCellId); - } - - // if everything after the cell prefix is 0, this stop key is actually exclusive - boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); - - final IntStream cellStream; - if (isStopExclusive) { - cellStream = IntStream.range(startCellId, stopCellId); - } else { - // this is inclusive, but we have to make sure we include at least the startCellId, - // even if stopCell = startCell + 1 - cellStream = IntStream.rangeClosed(startCellId, Math.max(stopCellId, startCellId + 1)); - } - - return cellStream.mapToObj(val -> (short) val).collect(Collectors.toSet()); - } - - private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { - for (int i = offset; i < stop.length; i++) { - if (stop[i] != (byte) 0) { - return false; - } - } - return true; - } - - private static short toCell(byte[] key) { - if (key == null || key.length < 2) { - throw new IllegalArgumentException( - "Key must be nonnull and at least 2 bytes long - passed " + Bytes.toHex(key)); - } - - return Bytes.toShort(key, 0, 2); + HubSpotCellUtilities.range(startKey, endKey, numCells).forEach(cellId -> serverHasCell[cellId] = true); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java new file mode 100644 index 000000000000..e6ca96d70c16 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java @@ -0,0 +1,245 @@ +package org.apache.hadoop.hbase.master.balancer; + +import org.agrona.collections.Int2IntCounterMap; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; +import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; +import org.apache.hbase.thirdparty.com.google.gson.ExclusionStrategy; +import org.apache.hbase.thirdparty.com.google.gson.FieldAttributes; +import org.apache.hbase.thirdparty.com.google.gson.Gson; +import org.apache.hbase.thirdparty.com.google.gson.GsonBuilder; +import org.apache.hbase.thirdparty.com.google.gson.JsonArray; +import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializationContext; +import org.apache.hbase.thirdparty.com.google.gson.JsonDeserializer; +import org.apache.hbase.thirdparty.com.google.gson.JsonElement; +import org.apache.hbase.thirdparty.com.google.gson.JsonObject; +import org.apache.hbase.thirdparty.com.google.gson.JsonParseException; +import org.apache.hbase.thirdparty.com.google.gson.JsonSerializationContext; +import org.apache.hbase.thirdparty.com.google.gson.JsonSerializer; +import org.apache.yetus.audience.InterfaceAudience; +import java.lang.reflect.Field; +import java.lang.reflect.Type; +import java.util.Arrays; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +@InterfaceAudience.Private +final class HubSpotCellUtilities { + // TODO: this should be dynamically configured, not hard-coded, but this dramatically simplifies the initial version + static final short MAX_CELL_COUNT = 360; + + static final Gson OBJECT_MAPPER = new GsonBuilder() + .excludeFieldsWithoutExposeAnnotation() + .enableComplexMapKeySerialization() + .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) + .registerTypeAdapter(RegionInfo.class, (JsonDeserializer) (json, typeOfT, context) -> { + JsonObject obj = json.getAsJsonObject(); + + boolean split = obj.get("split").getAsBoolean(); + long regionId = obj.get("regionId").getAsLong(); + int replicaId = obj.get("replicaId").getAsInt(); + JsonObject tableName = obj.get("tableName").getAsJsonObject(); + JsonArray startKey = obj.get("startKey").getAsJsonArray(); + JsonArray endKey = obj.get("endKey").getAsJsonArray(); + + byte[] startKeyBytes = new byte[startKey.size()]; + byte[] endKeyBytes = new byte[endKey.size()]; + + for (int i = 0; i < startKey.size(); i++) { + startKeyBytes[i] = startKey.get(i).getAsByte(); + } + for (int i = 0; i < endKey.size(); i++) { + endKeyBytes[i] = endKey.get(i).getAsByte(); + } + + TableName tb = TableName.valueOf( + tableName.get("namespaceAsString").getAsString(), + tableName.get("qualifierAsString").getAsString() + ); + + RegionInfo result = + RegionInfoBuilder.newBuilder(tb).setSplit(split).setRegionId(regionId) + .setReplicaId(replicaId).setStartKey(startKeyBytes).setEndKey(endKeyBytes).build(); + return result; + }) + .addDeserializationExclusionStrategy(new ExclusionStrategy() { + @Override public boolean shouldSkipField(FieldAttributes f) { + return f.getName().equals("serversToIndex") + || f.getName().equals("regionsToIndex") + || f.getName().equals("clusterState") + ; + } + + @Override public boolean shouldSkipClass(Class clazz) { + return false; + } + }) + .create(); + + private HubSpotCellUtilities() {} + + static String toCellSetString(Set cells) { + return cells.stream().sorted().map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); + } + + static boolean isStopInclusive(byte[] endKey) { + return (endKey == null || endKey.length != 2) && (endKey == null || endKey.length <= 2 + || !areSubsequentBytesAllZero(endKey, 2)); + } + + static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { + if (regionInfos == null || regionInfos.length == 0) { + return 0; + } + + Set cellsInRegions = Arrays.stream(regionInfos) + .map(region -> toCells(region.getStartKey(), region.getEndKey(), totalCellCount)) + .flatMap(Set::stream).collect(Collectors.toSet()); + return Shorts.checkedCast(cellsInRegions.size()); + } + + static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { + return range(padToTwoBytes(rawStart, (byte) 0), padToTwoBytes(rawStop, (byte) -1), numCells); + } + + private static byte[] padToTwoBytes(byte[] key, byte pad) { + if (key == null || key.length == 0) { + return new byte[] { pad, pad }; + } + + if (key.length == 1) { + return new byte[] { pad, key[0] }; + } + + return key; + } + + static Set range(byte[] start, byte[] stop, short numCells) { + short stopCellId = toCell(stop, (byte) -1, (short) (numCells - 1)); + if (stopCellId < 0 || stopCellId > numCells) { + stopCellId = numCells; + } + short startCellId = toCell(start, (byte) 0, (short) 0); + + if (startCellId == stopCellId) { + return ImmutableSet.of(startCellId); + } + + boolean isStopExclusive = areSubsequentBytesAllZero(stop, 2); + + final IntStream cellStream; + if (isStopExclusive) { + cellStream = IntStream.range(startCellId, stopCellId); + } else { + int stopCellIdForcedToIncludeStart = Math.max(stopCellId, startCellId + 1); + cellStream = IntStream.rangeClosed(startCellId, stopCellIdForcedToIncludeStart); + } + + return cellStream.mapToObj(val -> (short) val).collect(Collectors.toSet()); + } + + private static boolean areSubsequentBytesAllZero(byte[] stop, int offset) { + for (int i = offset; i < stop.length; i++) { + if (stop[i] != (byte) 0) { + return false; + } + } + return true; + } + + private static short toCell(byte[] key, byte pad, short ifAbsent) { + if (key == null) { + throw new IllegalArgumentException( + "Key must be nonnull"); + } + + return key.length == 0 + ? ifAbsent + : (key.length >= 2 + ? Bytes.toShort(key, 0, 2) + : Bytes.toShort(new byte[] { pad, key[0] })); + } + + static class Int2IntCounterMapAdapter implements JsonSerializer, + JsonDeserializer { + @Override public JsonElement serialize(Int2IntCounterMap src, Type typeOfSrc, + JsonSerializationContext context) { + JsonObject obj = new JsonObject(); + + obj.addProperty("loadFactor", src.loadFactor()); + obj.addProperty("initialValue", src.initialValue()); + obj.addProperty("resizeThreshold", src.resizeThreshold()); + obj.addProperty("size", src.size()); + + Field entryField = null; + try { + entryField = Int2IntCounterMap.class.getDeclaredField("entries"); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + entryField.setAccessible(true); + int[] entries = null; + try { + entries = (int[]) entryField.get(src); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + JsonArray entryArray = new JsonArray(entries.length); + for (int entry : entries) { + entryArray.add(entry); + } + obj.add("entries", entryArray); + + return obj; + } + + @Override public Int2IntCounterMap deserialize(JsonElement json, Type typeOfT, + JsonDeserializationContext context) throws JsonParseException { + JsonObject obj = json.getAsJsonObject(); + + float loadFactor = obj.get("loadFactor").getAsFloat(); + int initialValue = obj.get("initialValue").getAsInt(); + int resizeThreshold = obj.get("resizeThreshold").getAsInt(); + int size = obj.get("size").getAsInt(); + + JsonArray entryArray = obj.get("entries").getAsJsonArray(); + int[] entries = new int[entryArray.size()]; + + for (int i = 0; i < entryArray.size(); i++) { + entries[i] = entryArray.get(i).getAsInt(); + } + + Int2IntCounterMap result = new Int2IntCounterMap(0, loadFactor, initialValue); + + Field resizeThresholdField = null; + Field entryField = null; + Field sizeField = null; + + try { + resizeThresholdField = Int2IntCounterMap.class.getDeclaredField("resizeThreshold"); + entryField = Int2IntCounterMap.class.getDeclaredField("entries"); + sizeField = Int2IntCounterMap.class.getDeclaredField("size"); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } + + resizeThresholdField.setAccessible(true); + entryField.setAccessible(true); + sizeField.setAccessible(true); + + try { + resizeThresholdField.set(result, resizeThreshold); + entryField.set(result, entries); + sizeField.set(result, size); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + + return result; + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index fa9f358883ec..763909cf8cc0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -44,7 +44,7 @@ public class TestHubSpotCellCostFunction { @Test public void testCellCountTypical() { int numCells = - HubSpotCellCostFunction.calcNumCells( + HubSpotCellUtilities.calcNumCells( new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3) }, (short) 3); @@ -53,7 +53,7 @@ public void testCellCountTypical() { @Test public void testCellCountMultipleInRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + int numCells = HubSpotCellUtilities.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 4), buildRegionInfo((short) 4, (short) 5) }, (short) 5); assertEquals(5, numCells); @@ -61,7 +61,7 @@ public void testCellCountMultipleInRegion() { @Test public void testCellCountMultipleInLastRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + int numCells = HubSpotCellUtilities.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 5) }, (short) 5); assertEquals(5, numCells); @@ -69,7 +69,7 @@ public void testCellCountMultipleInLastRegion() { @Test public void testCellCountMultipleInFirstRegion() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + int numCells = HubSpotCellUtilities.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 4), buildRegionInfo((short) 4, (short) 5) }, (short) 5); assertEquals(5, numCells); @@ -77,7 +77,7 @@ public void testCellCountMultipleInFirstRegion() { @Test public void testCellCountLastKeyNull() { - int numCells = HubSpotCellCostFunction.calcNumCells(new RegionInfo[] { + int numCells = HubSpotCellUtilities.calcNumCells(new RegionInfo[] { buildRegionInfo((short) 0, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, (short) 4); assertEquals(4, numCells); @@ -86,7 +86,7 @@ public void testCellCountLastKeyNull() { @Test public void testCellCountFirstKeyNull() { int numCells = - HubSpotCellCostFunction.calcNumCells( + HubSpotCellUtilities.calcNumCells( new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, (short) 4) }, (short) 4); @@ -95,7 +95,7 @@ public void testCellCountFirstKeyNull() { @Test public void testCellCountBothEndsNull() { - int numCells = HubSpotCellCostFunction.calcNumCells( + int numCells = HubSpotCellUtilities.calcNumCells( new RegionInfo[] { buildRegionInfo(null, (short) 1), buildRegionInfo((short) 1, (short) 2), buildRegionInfo((short) 2, (short) 3), buildRegionInfo((short) 3, null) }, (short) 4); From 5ba25ae742c24187dbc352b590339bdc9141b710 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 13:44:28 -0500 Subject: [PATCH 088/126] Clean up the candidate generator --- .../HubSpotCellBasedCandidateGenerator.java | 168 ++++++------------ .../balancer/HubSpotCellCostFunction.java | 4 +- .../master/balancer/HubSpotCellUtilities.java | 2 + 3 files changed, 57 insertions(+), 117 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 83db8d36e94e..a413938c6dfe 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -27,9 +27,7 @@ import java.util.Set; import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; -import java.util.stream.IntStream; import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -45,7 +43,7 @@ LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); @Override BalanceAction generate(BalancerClusterState cluster) { - if (cluster.tables.stream().noneMatch(name -> name.contains("objects-3"))) { + if (cluster.tables.stream().noneMatch(HubSpotCellUtilities.TABLES_TO_BALANCE::contains)) { return BalanceAction.NULL_ACTION; } @@ -65,8 +63,7 @@ } List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); + Arrays.stream(cluster.regionsPerServer).map(regionsForServer -> computeCellGroupSizes(cluster, regionsForServer)).collect(Collectors.toList()); return generateAction(cluster, cellCounts, cellGroupSizesPerServer); } @@ -92,10 +89,51 @@ private BalanceAction generateAction( return moveRegionToUnderloadedServer; } - return swapRegionsToIncreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); } + private BalanceAction tryMoveRegionToSomeUnderloadedServer( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int targetRegionsPerServer + ) { + Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer); + if (!toServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + + int toServer = toServerMaybe.get(); + Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer); + if (!fromServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + int fromServer = fromServerMaybe.get(); + short cell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); + + return moveCell("fill underloaded", fromServer, cell, toServer, cellGroupSizesPerServer, cluster); + } + + private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + for (int server = 0; server < cluster.numServers; server++) { + if (cluster.regionsPerServer[server].length > targetRegionsPerServer) { + return Optional.of(server); + } + } + + return Optional.empty(); + } + + private Optional pickUnderloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + for (int server = 0; server < cluster.numServers; server++) { + if (cluster.regionsPerServer[server].length < targetRegionsPerServer) { + return Optional.of(server); + } + } + + return Optional.empty(); + } + private BalanceAction swapRegionsToIncreaseDistinctCellsPerServer( BalancerClusterState cluster, int[] cellCounts, @@ -130,6 +168,8 @@ private Optional> pickCellOnServerNotPresentOnSource( Map countsForFromServer = cellGroupSizesPerServer.get(fromServer); Optional> result = Optional.empty(); + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html double reservoirRandom = -1; for (int server = 0; server < cluster.numServers; server++) { if (server == fromServer) { @@ -165,9 +205,12 @@ private Optional pickServerWithoutEnoughIsolation( List> cellGroupSizesPerServer, int targetCellsPerServer ) { + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html Optional result = Optional.empty(); int lowestSoFar = Integer.MAX_VALUE; double reservoirRandom = -1; + for (int server = 0; server < cluster.numServers; server++) { int numCellsOnServer = cellGroupSizesPerServer.get(server).keySet().size(); if (numCellsOnServer < targetCellsPerServer) { @@ -188,28 +231,6 @@ private Optional pickServerWithoutEnoughIsolation( return result; } - private BalanceAction tryMoveRegionToSomeUnderloadedServer( - BalancerClusterState cluster, - int[] cellCounts, - List> cellGroupSizesPerServer, - int targetRegionsPerServer - ) { - Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer); - if (!toServerMaybe.isPresent()) { - return BalanceAction.NULL_ACTION; - } - - int toServer = toServerMaybe.get(); - Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer); - if (!fromServerMaybe.isPresent()) { - return BalanceAction.NULL_ACTION; - } - int fromServer = fromServerMaybe.get(); - short cell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); - - return moveCell("fill underloaded", fromServer, cell, toServer, cellGroupSizesPerServer, cluster); - } - private short pickCellToMove(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { return cellCountsForServer.keySet().stream() .max(Comparator.comparing(cell -> { @@ -221,26 +242,6 @@ private short pickCellToMove(BalancerClusterState cluster, int[] cellCounts, Map .get(); } - private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { - for (int server = 0; server < cluster.numServers; server++) { - if (cluster.regionsPerServer[server].length > targetRegionsPerServer) { - return Optional.of(server); - } - } - - return Optional.empty(); - } - - private Optional pickUnderloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { - for (int server = 0; server < cluster.numServers; server++) { - if (cluster.regionsPerServer[server].length < targetRegionsPerServer) { - return Optional.of(server); - } - } - - return Optional.empty(); - } - private MoveRegionAction moveCell( String originStep, int fromServer, short fromCell, @@ -310,14 +311,7 @@ private int pickRegionForCell(Multimap cellsByRegionOnServer, sh return cellsByRegionOnServer.keySet().stream() .filter(region -> cellsByRegionOnServer.get(region).contains(cellToMove)) .min(Comparator.comparingInt(region -> cellsByRegionOnServer.get(region).size())) - .orElseGet(() -> -1); - } - - static List computeCellsPerRs(BalancerClusterState cluster) { - List> cellGroupSizesPerServer = - IntStream.range(0, cluster.regionsPerServer.length).mapToObj( - serverIndex -> computeCellGroupSizes(cluster, cluster.regionsPerServer[serverIndex])).collect(Collectors.toList()); - return cellGroupSizesPerServer.stream().map(Map::size).collect(Collectors.toList()); + .orElseGet(() -> NO_REGION); } private static Map computeCellGroupSizes(BalancerClusterState cluster, int[] regionsForServer) { @@ -343,35 +337,8 @@ private static Map computeCellGroupSizes(BalancerClusterState cl continue; } - byte[] startKey = region.getStartKey(); - byte[] endKey = region.getEndKey(); - - short startCellId = (startKey == null || startKey.length == 0) ? - 0 : - (startKey.length >= 2 ? - Bytes.toShort(startKey, 0, 2) : - Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellUtilities.MAX_CELL_COUNT - 1) : - (endKey.length >= 2 ? - Bytes.toShort(endKey, 0, 2) : - Bytes.toShort(new byte[] { -1, endKey[0] })); - - if (startCellId < 0 || startCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { - startCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; - } - - if (endCellId < 0 || endCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { - endCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; - } - - for (short i = startCellId; i < endCellId; i++) { - cellCounts[i]++; - } - - if (HubSpotCellUtilities.isStopInclusive(endKey)) { - cellCounts[endCellId]++; - } + HubSpotCellUtilities.range(region.getStartKey(), region.getEndKey(), HubSpotCellUtilities.MAX_CELL_COUNT) + .forEach(cell -> cellCounts[cell]++); } for (short c = 0; c < cellCounts.length; c++) { @@ -396,35 +363,8 @@ private Multimap computeCellsByRegion(int[] regionIndices, Regio continue; } - byte[] startKey = region.getStartKey(); - byte[] endKey = region.getEndKey(); - - short startCellId = (startKey == null || startKey.length == 0) ? - 0 : - (startKey.length >= 2 ? - Bytes.toShort(startKey, 0, 2) : - Bytes.toShort(new byte[] { 0, startKey[0] })); - short endCellId = (endKey == null || endKey.length == 0) ? - (short) (HubSpotCellUtilities.MAX_CELL_COUNT - 1) : - (endKey.length >= 2 ? - Bytes.toShort(endKey, 0, 2) : - Bytes.toShort(new byte[] { -1, endKey[0] })); - - if (startCellId < 0 || startCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { - startCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; - } - - if (endCellId < 0 || endCellId > HubSpotCellUtilities.MAX_CELL_COUNT) { - endCellId = HubSpotCellUtilities.MAX_CELL_COUNT - 1; - } - - for (short i = startCellId; i < endCellId; i++) { - resultBuilder.put(regionIndex, i); - } - - if (HubSpotCellUtilities.isStopInclusive(endKey)) { - resultBuilder.put(regionIndex, endCellId); - } + HubSpotCellUtilities.range(region.getStartKey(), region.getEndKey(), HubSpotCellUtilities.MAX_CELL_COUNT) + .forEach(cell -> resultBuilder.put(regionIndex, cell)); } return resultBuilder.build(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index ed0354ab63b9..d7963dd23c08 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -36,7 +36,6 @@ import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; -import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; /** @@ -53,7 +52,6 @@ public class HubSpotCellCostFunction extends CostFunction { "hbase.master.balancer.stochastic.hubspotCellCost"; private static final float DEFAULT_HUBSPOT_CELL_COST = 0; - private static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); private int numServers; private short numCells; @@ -122,7 +120,7 @@ && isNeeded() @Override boolean isNeeded() { return cluster.tables.size() == 1 - && TABLES_TO_BALANCE.contains(Iterables.getOnlyElement(cluster.tables)) + && HubSpotCellUtilities.TABLES_TO_BALANCE.contains(Iterables.getOnlyElement(cluster.tables)) && cluster.regions != null && cluster.regions.length > 0; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java index e6ca96d70c16..d4bf810d45bc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java @@ -80,6 +80,8 @@ final class HubSpotCellUtilities { }) .create(); + static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); + private HubSpotCellUtilities() {} static String toCellSetString(Set cells) { From 3bd6efb74a5eb31bfb30105e65c9aa451d56b15f Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 13:48:13 -0500 Subject: [PATCH 089/126] Elevate to higher package so normalizer can share common cell ops --- .../HubSpotCellUtilities.java | 22 +++++++++---------- .../HubSpotCellBasedCandidateGenerator.java | 1 + .../balancer/HubSpotCellCostFunction.java | 3 ++- .../balancer/TestHubSpotCellCostFunction.java | 1 + 4 files changed, 15 insertions(+), 12 deletions(-) rename hbase-server/src/main/java/org/apache/hadoop/hbase/{master/balancer => hubspot}/HubSpotCellUtilities.java (92%) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java similarity index 92% rename from hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java rename to hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index d4bf810d45bc..aafa4ce7d022 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -1,4 +1,4 @@ -package org.apache.hadoop.hbase.master.balancer; +package org.apache.hadoop.hbase.hubspot; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.hbase.TableName; @@ -28,11 +28,11 @@ import java.util.stream.IntStream; @InterfaceAudience.Private -final class HubSpotCellUtilities { +public final class HubSpotCellUtilities { // TODO: this should be dynamically configured, not hard-coded, but this dramatically simplifies the initial version - static final short MAX_CELL_COUNT = 360; + public static final short MAX_CELL_COUNT = 360; - static final Gson OBJECT_MAPPER = new GsonBuilder() + public static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation() .enableComplexMapKeySerialization() .registerTypeAdapter(Int2IntCounterMap.class, new Int2IntCounterMapAdapter()) @@ -80,20 +80,20 @@ final class HubSpotCellUtilities { }) .create(); - static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); + public static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); private HubSpotCellUtilities() {} - static String toCellSetString(Set cells) { + public static String toCellSetString(Set cells) { return cells.stream().sorted().map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); } - static boolean isStopInclusive(byte[] endKey) { + public static boolean isStopInclusive(byte[] endKey) { return (endKey == null || endKey.length != 2) && (endKey == null || endKey.length <= 2 || !areSubsequentBytesAllZero(endKey, 2)); } - static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { + public static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { if (regionInfos == null || regionInfos.length == 0) { return 0; } @@ -104,11 +104,11 @@ static short calcNumCells(RegionInfo[] regionInfos, short totalCellCount) { return Shorts.checkedCast(cellsInRegions.size()); } - static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { + public static Set toCells(byte[] rawStart, byte[] rawStop, short numCells) { return range(padToTwoBytes(rawStart, (byte) 0), padToTwoBytes(rawStop, (byte) -1), numCells); } - private static byte[] padToTwoBytes(byte[] key, byte pad) { + public static byte[] padToTwoBytes(byte[] key, byte pad) { if (key == null || key.length == 0) { return new byte[] { pad, pad }; } @@ -120,7 +120,7 @@ private static byte[] padToTwoBytes(byte[] key, byte pad) { return key; } - static Set range(byte[] start, byte[] stop, short numCells) { + public static Set range(byte[] start, byte[] stop, short numCells) { short stopCellId = toCell(stop, (byte) -1, (short) (numCells - 1)); if (stopCellId < 0 || stopCellId > numCells) { stopCellId = numCells; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index a413938c6dfe..59b192af95a3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -28,6 +28,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.Collectors; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index d7963dd23c08..3daf70243291 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -39,7 +40,7 @@ import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; /** - * HubSpot addition: Cost function for balancing regions based on their (reversed) cell prefix. This + * HubSpot addition: Cost function for balancing regions based on their cell prefix. This * should not be upstreamed, and our upstream solution should instead focus on introduction of * balancer conditionals; see * HBASE-28513 diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index 763909cf8cc0..d1e0821cc14f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; From 61b89c5017aec21960fb09139262beea27234c46 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 13:55:23 -0500 Subject: [PATCH 090/126] Clean up + normalize cost --- .../master/balancer/HubSpotCellCostFunction.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 3daf70243291..09485a0cea0f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -63,6 +63,7 @@ public class HubSpotCellCostFunction extends CostFunction { private boolean[][] serverHasCell; private Int2IntCounterMap regionCountByCell; private int numRegionCellsOverassigned; + private double cost; HubSpotCellCostFunction(Configuration conf) { this.setMultiplier(conf.getFloat(HUBSPOT_CELL_COST_MULTIPLIER, DEFAULT_HUBSPOT_CELL_COST)); @@ -110,6 +111,7 @@ && isNeeded() serverHasCell, super.cluster::getRegionSizeMB ); + this.cost = (double) this.numRegionCellsOverassigned / (bestCaseMaxCellsPerServer * cluster.numServers); if (LOG.isTraceEnabled() && regions.length > 0 @@ -180,6 +182,9 @@ && isNeeded() } numRegionCellsOverassigned += changeInOverassignedRegionCells; + + int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + this.cost = (double) this.numRegionCellsOverassigned / (bestCaseMaxCellsPerServer * cluster.numServers); } private Map computeCellFrequencyForServer(int server) { @@ -248,7 +253,7 @@ private String snapshotState() { @Override protected double cost() { - return numRegionCellsOverassigned; + return cost; } static int calculateCurrentCellCost( @@ -260,7 +265,6 @@ static int calculateCurrentCellCost( boolean[][] serverHasCell, Function getRegionSizeMbFunc ) { - Preconditions.checkState(bestCaseMaxCellsPerServer > 0, "Best case max cells per server must be > 0"); @@ -272,14 +276,6 @@ static int calculateCurrentCellCost( tableAndNamespace); } - if (regions.length > 0 && !regions[0].getTable().getNamespaceAsString().equals("default")) { - if (LOG.isTraceEnabled()) { - LOG.trace("Skipping cost calculation for non-default namespace on {}", - regions[0].getTable().getNameWithNamespaceInclAsString()); - } - return 0; - } - for (int i = 0; i < regions.length; i++) { if (regions[i] == null) { throw new IllegalStateException("No region available at index " + i); From 66b2adfe2e970ee74973fd174ef3d2da40d4b270 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 14:14:06 -0500 Subject: [PATCH 091/126] Update the normalizer to avoid merging across cell lines --- .../hbase/hubspot/HubSpotCellUtilities.java | 6 +- .../HubSpotCellBasedCandidateGenerator.java | 2 +- .../balancer/HubSpotCellCostFunction.java | 2 +- .../HubspotCellAwareNormalizer.java | 78 ------------------- .../normalizer/RegionNormalizerFactory.java | 13 ++-- .../normalizer/SimpleRegionNormalizer.java | 19 +++++ 6 files changed, 32 insertions(+), 88 deletions(-) delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index aafa4ce7d022..803908261367 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -80,7 +80,7 @@ public final class HubSpotCellUtilities { }) .create(); - public static final ImmutableSet TABLES_TO_BALANCE = ImmutableSet.of("objects-3"); + public static final ImmutableSet CELL_AWARE_TABLES = ImmutableSet.of("objects-3"); private HubSpotCellUtilities() {} @@ -120,6 +120,10 @@ public static byte[] padToTwoBytes(byte[] key, byte pad) { return key; } + public static Set range(byte[] start, byte[] stop) { + return range(start, stop, MAX_CELL_COUNT); + } + public static Set range(byte[] start, byte[] stop, short numCells) { short stopCellId = toCell(stop, (byte) -1, (short) (numCells - 1)); if (stopCellId < 0 || stopCellId > numCells) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 59b192af95a3..63217a3eb64f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -44,7 +44,7 @@ LoggerFactory.getLogger(HubSpotCellBasedCandidateGenerator.class); @Override BalanceAction generate(BalancerClusterState cluster) { - if (cluster.tables.stream().noneMatch(HubSpotCellUtilities.TABLES_TO_BALANCE::contains)) { + if (cluster.tables.stream().noneMatch(HubSpotCellUtilities.CELL_AWARE_TABLES::contains)) { return BalanceAction.NULL_ACTION; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 09485a0cea0f..960741ec6f81 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -123,7 +123,7 @@ && isNeeded() @Override boolean isNeeded() { return cluster.tables.size() == 1 - && HubSpotCellUtilities.TABLES_TO_BALANCE.contains(Iterables.getOnlyElement(cluster.tables)) + && HubSpotCellUtilities.CELL_AWARE_TABLES.contains(Iterables.getOnlyElement(cluster.tables)) && cluster.regions != null && cluster.regions.length > 0; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java deleted file mode 100644 index e73b979356d8..000000000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/HubspotCellAwareNormalizer.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.normalizer; - -import java.util.ArrayList; -import java.util.List; -import org.apache.hadoop.hbase.client.TableDescriptor; -import org.apache.hadoop.hbase.util.Bytes; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -final class HubspotCellAwareNormalizer extends SimpleRegionNormalizer { - private static final Logger LOG = LoggerFactory.getLogger(HubspotCellAwareNormalizer.class); - - @Override - public List computePlansForTable(TableDescriptor tableDescriptor) { - List allPlans = super.computePlansForTable(tableDescriptor); - List filteredPlans = new ArrayList<>(allPlans.size()); - - for (NormalizationPlan plan : allPlans) { - boolean shouldInclude = shouldIncludePlan(plan); - if (shouldInclude) { - filteredPlans.add(plan); - } else { - LOG.info("Skipping plan: {}", plan); - } - } - - return filteredPlans; - } - - private static boolean shouldIncludePlan(NormalizationPlan plan) { - switch (plan.getType()) { - case MERGE: - return shouldIncludeMergePlan((MergeNormalizationPlan) plan); - case NONE: - case SPLIT: - return true; - default: - throw new RuntimeException("Unknown plan type: " + plan.getType()); - } - } - - private static boolean shouldIncludeMergePlan(MergeNormalizationPlan plan) { - List targets = plan.getNormalizationTargets(); - - if (targets.size() <= 1) { - return true; - } - - byte[] endKey = targets.get(0).getRegionInfo().getEndKey(); - short cell = Bytes.toShort(endKey); - - for (int i = 1; i < targets.size(); ++i) { - endKey = targets.get(i).getRegionInfo().getEndKey(); - if (cell != Bytes.toShort(endKey)) { - return false; - } - } - - return true; - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index fb071b5438fa..7e5d7a689c7e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hbase.master.normalizer; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.yetus.audience.InterfaceAudience; @@ -56,13 +58,10 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio */ private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer - // Class balancerKlass = - // conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - // RegionNormalizer.class); - // return ReflectionUtils.newInstance(balancerKlass, conf); - - // HACK - return new HubspotCellAwareNormalizer(); + Class balancerKlass = + conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + RegionNormalizer.class); + return ReflectionUtils.newInstance(balancerKlass, conf); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java index a0c296de88f4..d1c1685388ae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/SimpleRegionNormalizer.java @@ -25,9 +25,11 @@ import java.time.Period; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.function.BooleanSupplier; import java.util.function.Function; import org.apache.hadoop.conf.Configuration; @@ -41,10 +43,12 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.conf.ConfigurationObserver; +import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hbase.thirdparty.com.google.common.collect.Sets; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -352,6 +356,8 @@ private List computeMergeNormalizationPlans(final NormalizeCo return Collections.emptyList(); } + // HubSpot addition: is table cellularized + final boolean isCellAwareTable = HubSpotCellUtilities.CELL_AWARE_TABLES.contains(ctx.tableName.getNameAsString()); final long avgRegionSizeMb = (long) ctx.getAverageRegionSizeMb(); if (avgRegionSizeMb < configuration.getMergeMinRegionSizeMb(ctx)) { return Collections.emptyList(); @@ -373,6 +379,8 @@ private List computeMergeNormalizationPlans(final NormalizeCo // walk the region chain looking for contiguous sequences of regions that can be merged. rangeMembers.clear(); sumRangeMembersSizeMb = 0; + // HubSpot addition + Set cellsInRange = new HashSet<>(); for (current = rangeStart; current < ctx.getTableRegions().size(); current++) { final RegionInfo regionInfo = ctx.getTableRegions().get(current); final long regionSizeMb = getRegionSizeMB(regionInfo); @@ -395,6 +403,17 @@ private List computeMergeNormalizationPlans(final NormalizeCo // to the range when // there's capacity // remaining. + // HubSpot addition: for cell aware tables, don't merge across cell lines + if (isCellAwareTable) { + Set regionCells = + HubSpotCellUtilities.range(regionInfo.getStartKey(), regionInfo.getEndKey()); + if (cellsInRange.isEmpty()) { + cellsInRange.addAll(regionCells); + } else if (!Sets.difference(regionCells, cellsInRange).isEmpty()) { + // region contains cells not contained in current range, not mergable - back to outer loop + break; + } + } rangeMembers.add(new NormalizationTarget(regionInfo, regionSizeMb)); sumRangeMembersSizeMb += regionSizeMb; continue; From ddb30176573e17e79d1226cfdb7ff31efef42bc0 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 14:15:22 -0500 Subject: [PATCH 092/126] Mark addition --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index c7f872fc0844..326b309316cf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -148,6 +148,7 @@ public enum GeneratorType { LOAD, LOCALITY, RACK, + // HubSpot addition HUBSPOT_CELL } From bf9fe2837b05d6aac59481fbfabdc48f5010a3c9 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 14:17:16 -0500 Subject: [PATCH 093/126] Finish clean up --- .../hbase/master/balancer/StochasticLoadBalancer.java | 1 + .../hbase/master/normalizer/RegionNormalizerFactory.java | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 326b309316cf..bc7107317045 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -235,6 +235,7 @@ protected List createCandidateGenerators() { candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), new RegionReplicaRackCandidateGenerator()); + // HubSpot addition candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); return candidateGenerators; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index 7e5d7a689c7e..04241ed04f69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -21,9 +21,9 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.yetus.audience.InterfaceAudience; /** @@ -58,10 +58,10 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio */ private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer - Class balancerKlass = + Class balancerKlass = conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, RegionNormalizer.class); - return ReflectionUtils.newInstance(balancerKlass, conf); + return ReflectionUtils.newInstance(balancerKlass, conf); } } From 6044fc7f3fcd40958e0655f703b6ec45fc3a84b4 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 14:19:02 -0500 Subject: [PATCH 094/126] Fix import --- .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 960741ec6f81..f67d102aaa8c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import com.google.common.collect.Iterables; import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -31,6 +30,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; From edc89ffc17283e892fb1b9b5b7f8c2c6aaf68ed8 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 17:08:55 -0500 Subject: [PATCH 095/126] Print error if cell id is out of bounds --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index f67d102aaa8c..66cb2f2c54dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -327,7 +327,12 @@ private static void setCellsForServer( byte[] endKey, short numCells ) { - HubSpotCellUtilities.range(startKey, endKey, numCells).forEach(cellId -> serverHasCell[cellId] = true); + HubSpotCellUtilities.range(startKey, endKey, numCells) + .forEach(cellId -> { + Preconditions.checkState(0 <= cellId && cellId < numCells, + "Cell ID %d is out of bounds - failed to compute for [%s, %s)", cellId, Bytes.toHex(startKey), Bytes.toHex(endKey)); + serverHasCell[cellId] = true; + }); } @Override From 6a7511b27d0a6c6f263ca23848acafeb6afd1b41 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 17:12:00 -0500 Subject: [PATCH 096/126] Move this up --- .../balancer/HubSpotCellCostFunction.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 66cb2f2c54dc..889610865178 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -71,12 +71,19 @@ public class HubSpotCellCostFunction extends CostFunction { @Override void prepare(BalancerClusterState cluster) { + super.prepare(cluster); + if (!isNeeded(cluster)) { + if (LOG.isTraceEnabled()) { + LOG.trace("HubSpotCellCostFunction is not needed for {}", cluster.tables); + } + return; + } + numServers = cluster.numServers; numCells = HubSpotCellUtilities.calcNumCells(cluster.regions, HubSpotCellUtilities.MAX_CELL_COUNT); regions = cluster.regions; regionIndexToServerIndex = cluster.regionIndexToServerIndex; servers = cluster.servers; - super.prepare(cluster); if (LOG.isTraceEnabled() && isNeeded() @@ -122,10 +129,14 @@ && isNeeded() } @Override boolean isNeeded() { - return cluster.tables.size() == 1 - && HubSpotCellUtilities.CELL_AWARE_TABLES.contains(Iterables.getOnlyElement(cluster.tables)) - && cluster.regions != null - && cluster.regions.length > 0; + return isNeeded(cluster); + } + + private boolean isNeeded(BalancerClusterState currentClusterState) { + return currentClusterState.tables.size() == 1 + && HubSpotCellUtilities.CELL_AWARE_TABLES.contains(Iterables.getOnlyElement(currentClusterState.tables)) + && currentClusterState.regions != null + && currentClusterState.regions.length > 0; } @Override protected void regionMoved(int region, int oldServer, int newServer) { From ff1ef1f8b28dbaba0233b7925c9eb2ee57d18270 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 2 Dec 2024 17:56:35 -0500 Subject: [PATCH 097/126] Improve debug output --- .../master/balancer/HubSpotCellCostFunction.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 889610865178..d37dbba19ca9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -120,11 +120,14 @@ && isNeeded() ); this.cost = (double) this.numRegionCellsOverassigned / (bestCaseMaxCellsPerServer * cluster.numServers); - if (LOG.isTraceEnabled() - && regions.length > 0 + if (regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") ) { - LOG.trace("Evaluated (cost={}) {}", String.format("%d", numRegionCellsOverassigned), snapshotState()); + if (LOG.isDebugEnabled()) { + LOG.debug("Evaluated (cost={})", String.format("%.4f", this.cost)); + } else if (LOG.isTraceEnabled()) { + LOG.trace("Evaluated (cost={}) {}", String.format("%.4f", this.cost), snapshotState()); + } } } @@ -326,7 +329,7 @@ static int calculateCurrentCellCost( if (LOG.isDebugEnabled()) { debugBuilder.append("]"); - LOG.debug("Cost {} from {}", cost, debugBuilder); + LOG.debug("Unweighted cost {} from {}", cost, debugBuilder); } return cost; From e0695c07b49cc5108f893ef7cfb18b66bfcdefcf Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 3 Dec 2024 16:03:50 -0500 Subject: [PATCH 098/126] Cap max cells per RS to be 10% of all cells --- .../org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java | 1 + .../master/balancer/HubSpotCellBasedCandidateGenerator.java | 2 ++ .../hadoop/hbase/master/balancer/HubSpotCellCostFunction.java | 2 ++ 3 files changed, 5 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index 803908261367..134617040c61 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -31,6 +31,7 @@ public final class HubSpotCellUtilities { // TODO: this should be dynamically configured, not hard-coded, but this dramatically simplifies the initial version public static final short MAX_CELL_COUNT = 360; + public static final int MAX_CELLS_PER_RS = 36; public static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation() diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 63217a3eb64f..9df256d96fb7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -84,6 +84,8 @@ private BalanceAction generateAction( } int targetCellsPerServer = targetRegionsPerServer - numTimesCellRegionsFillAllServers; + targetCellsPerServer = Math.min(targetCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index d37dbba19ca9..a21d9f11d10d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -99,6 +99,8 @@ && isNeeded() this.serverHasCell = new boolean[numServers][numCells]; int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); + bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + this.regionCountByCell = new Int2IntCounterMap(HubSpotCellUtilities.MAX_CELL_COUNT, 0.5f, 0); Arrays.stream(cluster.regions) .forEach(r -> HubSpotCellUtilities.toCells(r.getStartKey(), r.getEndKey(), HubSpotCellUtilities.MAX_CELL_COUNT).forEach(cell -> regionCountByCell.addAndGet((int) cell, 1))); From e6a9d7c1d9e2384e93851dda88552f3fbfdafd4f Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 3 Dec 2024 17:06:58 -0500 Subject: [PATCH 099/126] Do not install unless multiplier is positive --- .../master/balancer/StochasticLoadBalancer.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index bc7107317045..9fde32db6926 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -229,14 +229,19 @@ List getCandidateGenerators() { } protected List createCandidateGenerators() { - List candidateGenerators = new ArrayList(5); + // HubSpot addition + int numGenerators = cellCostFunction.getMultiplier() > 0 ? 5 : 4; + List candidateGenerators = new ArrayList(numGenerators); candidateGenerators.add(GeneratorType.RANDOM.ordinal(), new RandomCandidateGenerator()); candidateGenerators.add(GeneratorType.LOAD.ordinal(), new LoadCandidateGenerator()); candidateGenerators.add(GeneratorType.LOCALITY.ordinal(), localityCandidateGenerator); candidateGenerators.add(GeneratorType.RACK.ordinal(), new RegionReplicaRackCandidateGenerator()); // HubSpot addition - candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), new HubSpotCellBasedCandidateGenerator()); + if (cellCostFunction.getMultiplier() > 0) { + candidateGenerators.add(GeneratorType.HUBSPOT_CELL.ordinal(), + new HubSpotCellBasedCandidateGenerator()); + } return candidateGenerators; } @@ -254,6 +259,8 @@ protected void loadConf(Configuration conf) { localityCost = new ServerLocalityCostFunction(conf); rackLocalityCost = new RackLocalityCostFunction(conf); + // HubSpot addition: + cellCostFunction = new HubSpotCellCostFunction(conf); this.candidateGenerators = createCandidateGenerators(); regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); @@ -273,8 +280,9 @@ protected void loadConf(Configuration conf) { addCostFunction(new StoreFileCostFunction(conf)); // HubSpot addition: - cellCostFunction = new HubSpotCellCostFunction(conf); + if (cellCostFunction.getMultiplier() > 0) { addCostFunction(cellCostFunction); + } loadCustomCostFunctions(conf); From 7b6924730ad99982ddaf57456ea4e5379ee4643a Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 3 Dec 2024 19:53:34 -0500 Subject: [PATCH 100/126] Target a specific capped cell count --- .../HubSpotCellBasedCandidateGenerator.java | 214 +++++++++++++++++- .../balancer/HubSpotCellCostFunction.java | 56 +++-- .../balancer/TestHubSpotCellCostFunction.java | 4 +- 3 files changed, 238 insertions(+), 36 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 9df256d96fb7..7b24fcb12690 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -85,7 +85,144 @@ private BalanceAction generateAction( int targetCellsPerServer = targetRegionsPerServer - numTimesCellRegionsFillAllServers; targetCellsPerServer = Math.min(targetCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + Set serversBelowTarget = new HashSet<>(); + Set serversAboveTarget = new HashSet<>(); + for (int server = 0; server < cluster.numServers; server++) { + int numCellsOnServer = cellGroupSizesPerServer.get(server).keySet().size(); + if (numCellsOnServer < targetCellsPerServer) { + serversBelowTarget.add(server); + } else if (numCellsOnServer > targetCellsPerServer) { + serversAboveTarget.add(server); + } + } + + if (serversBelowTarget.isEmpty() && serversAboveTarget.isEmpty()) { + return actionIfAllServersAtTarget(); + } else if (!serversAboveTarget.isEmpty()) { + return actionIfSomeServersAboveTarget(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer, targetRegionsPerServer); + } else { + return actionIfSomeServersBelowTarget(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer, targetRegionsPerServer); + } + } + + private BalanceAction actionIfSomeServersAboveTarget( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int targetCellsPerServer, + int targetRegionsPerServer + ) { + BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); + + if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { + return moveRegionToUnderloadedServer; + } + + return swapRegionsToDecreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); + } + + private BalanceAction swapRegionsToDecreaseDistinctCellsPerServer( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int targetCellsPerServer + ) { + Optional fromServerMaybe = pickServerWithTooManyCells(cluster, cellGroupSizesPerServer, targetCellsPerServer); + if (!fromServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + int fromServer = fromServerMaybe.get(); + short fromCell = pickLeastFrequentCell(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); + + Optional> toCellMaybe = + pickCellOnServerPresentOnSource(cluster, cellCounts, cellGroupSizesPerServer, fromServer, fromCell); + if (!toCellMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + + short toCell = toCellMaybe.get().getFirst(); + int toServer = toCellMaybe.get().getSecond(); + + return swapCells("swap to decrease", fromServer, fromCell, toServer, toCell, cellGroupSizesPerServer, cluster); + } + + private Optional> pickCellOnServerPresentOnSource( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int fromServer, + short cell + ) { + Map countsForFromServer = cellGroupSizesPerServer.get(fromServer); + Optional> result = Optional.empty(); + + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html + double reservoirRandom = -1; + for (int server = 0; server < cluster.numServers; server++) { + if (server == fromServer) { + continue; + } + + Map countsForToCandidate = cellGroupSizesPerServer.get(server); + Set candidateCellsOnTo = new HashSet<>(); + for (short cellOnTo : countsForToCandidate.keySet()) { + if (countsForFromServer.containsKey(cellOnTo)) { + candidateCellsOnTo.add(cellOnTo); + } + } + + if (!candidateCellsOnTo.isEmpty()) { + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(Pair.newPair(candidateCellsOnTo.stream().findAny().get(), server)); + } + } + } + + return result; + } + + private Optional pickServerWithTooManyCells( + BalancerClusterState cluster, + List> cellGroupSizesPerServer, + int targetCellsPerServer + ) { + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html + Optional result = Optional.empty(); + int highestSoFar = Integer.MIN_VALUE; + double reservoirRandom = -1; + + for (int server = 0; server < cluster.numServers; server++) { + int numCellsOnServer = cellGroupSizesPerServer.get(server).keySet().size(); + if (numCellsOnServer > targetCellsPerServer) { + if (numCellsOnServer > highestSoFar) { + highestSoFar = numCellsOnServer; + reservoirRandom = ThreadLocalRandom.current().nextDouble(); + result = Optional.of(server); + } else if (numCellsOnServer == highestSoFar) { + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(server); + } + } + } + } + + return result; + } + + private BalanceAction actionIfSomeServersBelowTarget( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int targetCellsPerServer, + int targetRegionsPerServer + ) { BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { @@ -95,6 +232,10 @@ private BalanceAction generateAction( return swapRegionsToIncreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); } + private BalanceAction actionIfAllServersAtTarget() { + return BalanceAction.NULL_ACTION; + } + private BalanceAction tryMoveRegionToSomeUnderloadedServer( BalancerClusterState cluster, int[] cellCounts, @@ -112,7 +253,7 @@ private BalanceAction tryMoveRegionToSomeUnderloadedServer( return BalanceAction.NULL_ACTION; } int fromServer = fromServerMaybe.get(); - short cell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); + short cell = pickMostFrequentCell(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); return moveCell("fill underloaded", fromServer, cell, toServer, cellGroupSizesPerServer, cluster); } @@ -148,7 +289,7 @@ private BalanceAction swapRegionsToIncreaseDistinctCellsPerServer( return BalanceAction.NULL_ACTION; } int fromServer = fromServerMaybe.get(); - short fromCell = pickCellToMove(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); + short fromCell = pickMostFrequentCell(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); Optional> toCellMaybe = pickCellOnServerNotPresentOnSource(cluster, cellCounts, cellGroupSizesPerServer, fromServer, fromCell); if (!toCellMaybe.isPresent()) { @@ -158,7 +299,7 @@ private BalanceAction swapRegionsToIncreaseDistinctCellsPerServer( short toCell = toCellMaybe.get().getFirst(); int toServer = toCellMaybe.get().getSecond(); - return swapCells("swap", fromServer, fromCell, toServer, toCell, cellGroupSizesPerServer, cluster); + return swapCells("swap to increase", fromServer, fromCell, toServer, toCell, cellGroupSizesPerServer, cluster); } private Optional> pickCellOnServerNotPresentOnSource( @@ -234,15 +375,66 @@ private Optional pickServerWithoutEnoughIsolation( return result; } - private short pickCellToMove(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { - return cellCountsForServer.keySet().stream() - .max(Comparator.comparing(cell -> { - int regionsForCell = cellCounts[cell]; - int expectedCountOnAllServers = Ints.checkedCast((long) Math.floor((double) regionsForCell / cluster.numServers)); + private short pickMostFrequentCell(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { + List cellsOrderedLeastToMostFrequent = getCellsOrderedLeastToMostFrequent(cluster, cellCounts, cellCountsForServer); + + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html + Optional result = Optional.of(cellsOrderedLeastToMostFrequent.get(cellsOrderedLeastToMostFrequent.size() - 1)); + int highestSoFar = cellCountsForServer.get(cellsOrderedLeastToMostFrequent.get(cellsOrderedLeastToMostFrequent.size() - 1)); + double reservoirRandom = ThreadLocalRandom.current().nextDouble(); + + for (int cellIndex = cellsOrderedLeastToMostFrequent.size() - 2; cellIndex >= 0; cellIndex--) { + short cell = cellsOrderedLeastToMostFrequent.get(cellIndex); + int numInstancesOfCell = cellCountsForServer.get(cell); + if (numInstancesOfCell < highestSoFar) { + break; + } + + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(cell); + } + } + + return result.get(); + } + + private short pickLeastFrequentCell(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { + List cellsOrderedLeastToMostFrequent = getCellsOrderedLeastToMostFrequent(cluster, cellCounts, cellCountsForServer); + + // randomly select one using a simplified inline reservoir sample + // See: http://gregable.com/2007/10/reservoir-sampling.html + Optional result = Optional.of(cellsOrderedLeastToMostFrequent.get(0)); + int lowestSoFar = cellCountsForServer.get(cellsOrderedLeastToMostFrequent.get(0)); + double reservoirRandom = ThreadLocalRandom.current().nextDouble(); + + for (int cellIndex = 1; cellIndex < cellsOrderedLeastToMostFrequent.size(); cellIndex++) { + short cell = cellsOrderedLeastToMostFrequent.get(cellIndex); + int numInstancesOfCell = cellCountsForServer.get(cell); + if (numInstancesOfCell > lowestSoFar) { + break; + } + + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (candidateRandom > reservoirRandom) { + reservoirRandom = candidateRandom; + result = Optional.of(cell); + } + } + + return result.get(); + } + + private List getCellsOrderedLeastToMostFrequent(BalancerClusterState cluster, int[] cellCounts, Map cellCountsForServer) { + return cellCountsForServer.keySet().stream().sorted(Comparator.comparing(cell -> { + int regionsForCell = cellCounts[cell]; + int expectedCountOnAllServers = + Ints.checkedCast((long) Math.floor((double) regionsForCell / cluster.numServers)); - return cellCountsForServer.get(cell) - expectedCountOnAllServers; - })) - .get(); + return cellCountsForServer.get(cell) - expectedCountOnAllServers; + })).collect(Collectors.toList()); } private MoveRegionAction moveCell( diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index a21d9f11d10d..41962d685568 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -62,7 +62,10 @@ public class HubSpotCellCostFunction extends CostFunction { private boolean[][] serverHasCell; private Int2IntCounterMap regionCountByCell; - private int numRegionCellsOverassigned; + + private int maxAcceptableCellsPerServer; + + private int numServerCellsOutsideDesiredBand; private double cost; HubSpotCellCostFunction(Configuration conf) { @@ -99,7 +102,6 @@ && isNeeded() this.serverHasCell = new boolean[numServers][numCells]; int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); this.regionCountByCell = new Int2IntCounterMap(HubSpotCellUtilities.MAX_CELL_COUNT, 0.5f, 0); Arrays.stream(cluster.regions) @@ -111,16 +113,19 @@ && isNeeded() } bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; + bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + this.maxAcceptableCellsPerServer = bestCaseMaxCellsPerServer; - this.numRegionCellsOverassigned = - calculateCurrentCellCost( + this.numServerCellsOutsideDesiredBand = + calculateCurrentCountOfCellsOutsideDesiredBand( numCells, - numServers, bestCaseMaxCellsPerServer, + numServers, + maxAcceptableCellsPerServer, regions, regionIndexToServerIndex, serverHasCell, super.cluster::getRegionSizeMB ); - this.cost = (double) this.numRegionCellsOverassigned / (bestCaseMaxCellsPerServer * cluster.numServers); + this.cost = (double) this.numServerCellsOutsideDesiredBand / (cluster.numRegions); if (regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") @@ -176,31 +181,37 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { ); } - int changeInOverassignedRegionCells = 0; + int changeInRegionCellsOutsideDesiredBand = 0; for (short movingCell : cellsOnRegion) { // this is invoked AFTER the region has been moved boolean didMoveDecreaseCellsOnOldServer = !numRegionsForCellOnOldServer.containsKey(movingCell); boolean didMoveIncreaseCellsOnNewServer = numRegionsForCellOnNewServer.get(movingCell) == 1; - if (didMoveDecreaseCellsOnOldServer) { - changeInOverassignedRegionCells++; - serverHasCell[oldServer][movingCell] = false; - } - if (didMoveIncreaseCellsOnNewServer) { - changeInOverassignedRegionCells--; serverHasCell[newServer][movingCell] = true; + if (currentCellCountNewServer <= maxAcceptableCellsPerServer) { + changeInRegionCellsOutsideDesiredBand--; + } else { + changeInRegionCellsOutsideDesiredBand++; + } + } + if (didMoveDecreaseCellsOnOldServer) { + serverHasCell[oldServer][movingCell] = false; + if (currentCellCountOldServer >= maxAcceptableCellsPerServer) { + changeInRegionCellsOutsideDesiredBand--; + } else { + changeInRegionCellsOutsideDesiredBand++; + } } } if (LOG.isDebugEnabled()) { - LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, changeInOverassignedRegionCells); + LOG.debug("Move cost delta for s{}.r{} --> s{} is {}", oldServer, region, newServer, changeInRegionCellsOutsideDesiredBand); } - numRegionCellsOverassigned += changeInOverassignedRegionCells; + this.numServerCellsOutsideDesiredBand += changeInRegionCellsOutsideDesiredBand; - int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); - this.cost = (double) this.numRegionCellsOverassigned / (bestCaseMaxCellsPerServer * cluster.numServers); + this.cost = (double) this.numServerCellsOutsideDesiredBand / (maxAcceptableCellsPerServer * cluster.numServers); } private Map computeCellFrequencyForServer(int server) { @@ -272,17 +283,17 @@ protected double cost() { return cost; } - static int calculateCurrentCellCost( + static int calculateCurrentCountOfCellsOutsideDesiredBand( short numCells, int numServers, - int bestCaseMaxCellsPerServer, + int maxAcceptableCellsPerServer, RegionInfo[] regions, int[] regionLocations, boolean[][] serverHasCell, Function getRegionSizeMbFunc ) { - Preconditions.checkState(bestCaseMaxCellsPerServer > 0, - "Best case max cells per server must be > 0"); + Preconditions.checkState(maxAcceptableCellsPerServer > 0, + "Max cells per server must be > 0"); if (LOG.isTraceEnabled()) { Set tableAndNamespace = Arrays.stream(regions).map(RegionInfo::getTable) @@ -321,8 +332,7 @@ static int calculateCurrentCellCost( cellsOnThisServer++; } } - - int costForThisServer = Math.max(bestCaseMaxCellsPerServer - cellsOnThisServer, 0); + int costForThisServer = Math.abs(cellsOnThisServer - maxAcceptableCellsPerServer); if (LOG.isDebugEnabled()) { debugBuilder.append(server).append("=").append(costForThisServer).append(", "); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java index d1e0821cc14f..ac34e5d4400e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestHubSpotCellCostFunction.java @@ -106,7 +106,7 @@ public void testCellCountBothEndsNull() { @Test public void testCostBalanced() { // 4 cells, 4 servers, perfectly balanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost + int cost = HubSpotCellCostFunction.calculateCurrentCountOfCellsOutsideDesiredBand ((short) 4, 4, 1, @@ -127,7 +127,7 @@ public void testCostBalanced() { @Test public void testCostImbalanced() { // 4 cells, 4 servers, imbalanced - int cost = HubSpotCellCostFunction.calculateCurrentCellCost( + int cost = HubSpotCellCostFunction.calculateCurrentCountOfCellsOutsideDesiredBand( (short) 4, 4, 1, From 6b2df75796ddd55a57274d6c1db8e9e1f2907550 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 3 Dec 2024 23:46:32 -0500 Subject: [PATCH 101/126] Simplify when we fill underloaded --- .../HubSpotCellBasedCandidateGenerator.java | 53 +++++-------------- 1 file changed, 14 insertions(+), 39 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 7b24fcb12690..23e120435880 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -77,6 +77,17 @@ private BalanceAction generateAction( int targetRegionsPerServer = Ints.checkedCast( (long) Math.floor((double) cluster.numRegions / cluster.numServers)); + BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer( + cluster, + cellCounts, + cellGroupSizesPerServer, + targetRegionsPerServer + ); + + if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { + return moveRegionToUnderloadedServer; + } + int numTimesCellRegionsFillAllServers = 0; for (int cell = 0; cell < HubSpotCellUtilities.MAX_CELL_COUNT; cell++) { int numRegionsForCell = cellCounts[cell]; @@ -98,30 +109,14 @@ private BalanceAction generateAction( } if (serversBelowTarget.isEmpty() && serversAboveTarget.isEmpty()) { - return actionIfAllServersAtTarget(); + return BalanceAction.NULL_ACTION; } else if (!serversAboveTarget.isEmpty()) { - return actionIfSomeServersAboveTarget(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer, targetRegionsPerServer); + return swapRegionsToDecreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); } else { - return actionIfSomeServersBelowTarget(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer, targetRegionsPerServer); + return swapRegionsToIncreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); } } - private BalanceAction actionIfSomeServersAboveTarget( - BalancerClusterState cluster, - int[] cellCounts, - List> cellGroupSizesPerServer, - int targetCellsPerServer, - int targetRegionsPerServer - ) { - BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); - - if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { - return moveRegionToUnderloadedServer; - } - - return swapRegionsToDecreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); - } - private BalanceAction swapRegionsToDecreaseDistinctCellsPerServer( BalancerClusterState cluster, int[] cellCounts, @@ -216,26 +211,6 @@ private Optional pickServerWithTooManyCells( return result; } - private BalanceAction actionIfSomeServersBelowTarget( - BalancerClusterState cluster, - int[] cellCounts, - List> cellGroupSizesPerServer, - int targetCellsPerServer, - int targetRegionsPerServer - ) { - BalanceAction moveRegionToUnderloadedServer = tryMoveRegionToSomeUnderloadedServer(cluster, cellCounts, cellGroupSizesPerServer, targetRegionsPerServer); - - if (moveRegionToUnderloadedServer != BalanceAction.NULL_ACTION) { - return moveRegionToUnderloadedServer; - } - - return swapRegionsToIncreaseDistinctCellsPerServer(cluster, cellCounts, cellGroupSizesPerServer, targetCellsPerServer); - } - - private BalanceAction actionIfAllServersAtTarget() { - return BalanceAction.NULL_ACTION; - } - private BalanceAction tryMoveRegionToSomeUnderloadedServer( BalancerClusterState cluster, int[] cellCounts, From 4cae482c10a1dda62b7d754055d0a4b6ff624663 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 3 Dec 2024 23:48:24 -0500 Subject: [PATCH 102/126] include target --- .../master/balancer/HubSpotCellBasedCandidateGenerator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 23e120435880..383bc54f1214 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -230,7 +230,7 @@ private BalanceAction tryMoveRegionToSomeUnderloadedServer( int fromServer = fromServerMaybe.get(); short cell = pickMostFrequentCell(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); - return moveCell("fill underloaded", fromServer, cell, toServer, cellGroupSizesPerServer, cluster); + return moveCell("fill underloaded - target = " + targetRegionsPerServer, fromServer, cell, toServer, cellGroupSizesPerServer, cluster); } private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { From 28b0271a603e8537f47c4c9175f7aa7cb0d852a5 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 09:35:58 -0500 Subject: [PATCH 103/126] Emit which generator --- .../hbase/master/balancer/StochasticLoadBalancer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 9fde32db6926..cc8b10052ae6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -448,7 +448,11 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") BalanceAction nextAction(BalancerClusterState cluster) { - return getRandomGenerator().generate(cluster); + CandidateGenerator generator = getRandomGenerator(); + if (LOG.isTraceEnabled()) { + LOG.trace("Using generator {}", generator.getClass().getSimpleName()); + } + return generator.generate(cluster); } /** From f8085d5c998c85529308e2bf497d12be6d8f97b5 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 09:43:45 -0500 Subject: [PATCH 104/126] randomize the under-/overloaded server picked --- .../HubSpotCellBasedCandidateGenerator.java | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index 383bc54f1214..a2f2e1d529aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -234,23 +234,43 @@ private BalanceAction tryMoveRegionToSomeUnderloadedServer( } private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + Optional pickedServer = Optional.empty(); + double reservoirRandom = -1; + for (int server = 0; server < cluster.numServers; server++) { if (cluster.regionsPerServer[server].length > targetRegionsPerServer) { - return Optional.of(server); + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (!pickedServer.isPresent()) { + pickedServer = Optional.of(server); + reservoirRandom = candidateRandom; + } else if (candidateRandom > reservoirRandom) { + pickedServer = Optional.of(server); + reservoirRandom = candidateRandom; + } } } - return Optional.empty(); + return pickedServer; } private Optional pickUnderloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + Optional pickedServer = Optional.empty(); + double reservoirRandom = -1; + for (int server = 0; server < cluster.numServers; server++) { if (cluster.regionsPerServer[server].length < targetRegionsPerServer) { - return Optional.of(server); + double candidateRandom = ThreadLocalRandom.current().nextDouble(); + if (!pickedServer.isPresent()) { + pickedServer = Optional.of(server); + reservoirRandom = candidateRandom; + } else if (candidateRandom > reservoirRandom) { + pickedServer = Optional.of(server); + reservoirRandom = candidateRandom; + } } } - return Optional.empty(); + return pickedServer; } private BalanceAction swapRegionsToIncreaseDistinctCellsPerServer( From e8361dfbe1ed5ff4f093d774dfb9cf2f62745b7d Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 09:55:48 -0500 Subject: [PATCH 105/126] Mark if we keep or reject --- .../master/balancer/StochasticLoadBalancer.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index cc8b10052ae6..9dd23399d090 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -596,19 +596,24 @@ protected List balanceTable(TableName tableName, newCost = computeCost(cluster, currentCost); - if(LOG.isTraceEnabled()) { - LOG.trace("S[{}]: {} -> {} via {} -- {}", - step, currentCost, newCost, action, totalCostsPerFunc()); - } - // Should this be kept? if (newCost < currentCost) { + if(LOG.isTraceEnabled()) { + LOG.trace(" S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); + } + currentCost = newCost; // save for JMX curOverallCost = currentCost; System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); } else { + if(LOG.isTraceEnabled()) { + LOG.trace(" S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); + } + // Put things back the way they were before. // TODO: undo by remembering old values BalanceAction undoAction = action.undoAction(); From ca45b367459f3af497c965128f911e3187e668be Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 10:02:58 -0500 Subject: [PATCH 106/126] Print region counts --- .../hbase/master/balancer/HubSpotCellCostFunction.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 41962d685568..396243710996 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -167,16 +167,20 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { Map numRegionsForCellOnNewServer = computeCellFrequencyForServer(newServer); int currentCellCountOldServer = numRegionsForCellOnOldServer.keySet().size(); + int currentRegionCountOldServer = numRegionsForCellOnOldServer.values().stream().mapToInt(Integer::intValue).sum() + 1; int currentCellCountNewServer = numRegionsForCellOnNewServer.keySet().size(); + int currentRegionCountNewServer = numRegionsForCellOnNewServer.values().stream().mapToInt(Integer::intValue).sum() - 1; if (LOG.isDebugEnabled()) { LOG.debug( - "Old server {} [{}] has cell frequency of {}.\n\nNew server {} [{}] has cell frequency of {}.", + "Old server {} [{} cells, {} regions] has cell frequency of {}.\n\nNew server {} [{} cells, {} regions] has cell frequency of {}.", oldServer, currentCellCountOldServer, + currentRegionCountOldServer, numRegionsForCellOnOldServer, newServer, currentCellCountNewServer, + currentRegionCountNewServer, numRegionsForCellOnNewServer ); } From b8ac3838ebf9e8bdd4179a664285a91c12a7e21c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 13:57:33 -0500 Subject: [PATCH 107/126] Prioritize balance by region and THEN evening out cell isolation --- .../HubSpotCellBasedCandidateGenerator.java | 83 +++++++++++++++---- .../balancer/HubSpotCellCostFunction.java | 43 ++++++++-- 2 files changed, 103 insertions(+), 23 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index a2f2e1d529aa..ede3f1324a81 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -88,6 +88,17 @@ private BalanceAction generateAction( return moveRegionToUnderloadedServer; } + BalanceAction moveRegionFromOverloadedServer = tryMoveRegionFromSomeOverloadedServer( + cluster, + cellCounts, + cellGroupSizesPerServer, + targetRegionsPerServer + ); + + if (moveRegionFromOverloadedServer != BalanceAction.NULL_ACTION) { + return moveRegionFromOverloadedServer; + } + int numTimesCellRegionsFillAllServers = 0; for (int cell = 0; cell < HubSpotCellUtilities.MAX_CELL_COUNT; cell++) { int numRegionsForCell = cellCounts[cell]; @@ -117,6 +128,28 @@ private BalanceAction generateAction( } } + private BalanceAction tryMoveRegionFromSomeOverloadedServer( + BalancerClusterState cluster, + int[] cellCounts, + List> cellGroupSizesPerServer, + int targetRegionsPerServer + ) { + Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer, ComparisonMode.ALLOW_OFF_BY_ONE); + if (!fromServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + + int fromServer = fromServerMaybe.get(); + Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer, ComparisonMode.ALLOW_OFF_BY_ONE); + if (!toServerMaybe.isPresent()) { + return BalanceAction.NULL_ACTION; + } + int toServer = toServerMaybe.get(); + short cell = pickMostFrequentCell(cluster, cellCounts, cellGroupSizesPerServer.get(fromServer)); + + return moveCell("evacuate overloaded - target = " + targetRegionsPerServer, fromServer, cell, toServer, cellGroupSizesPerServer, cluster); + } + private BalanceAction swapRegionsToDecreaseDistinctCellsPerServer( BalancerClusterState cluster, int[] cellCounts, @@ -163,7 +196,7 @@ private Optional> pickCellOnServerPresentOnSource( Map countsForToCandidate = cellGroupSizesPerServer.get(server); Set candidateCellsOnTo = new HashSet<>(); for (short cellOnTo : countsForToCandidate.keySet()) { - if (countsForFromServer.containsKey(cellOnTo)) { + if (cellOnTo != cell && countsForFromServer.containsKey(cellOnTo)) { candidateCellsOnTo.add(cellOnTo); } } @@ -217,13 +250,13 @@ private BalanceAction tryMoveRegionToSomeUnderloadedServer( List> cellGroupSizesPerServer, int targetRegionsPerServer ) { - Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer); + Optional toServerMaybe = pickUnderloadedServer(cluster, targetRegionsPerServer, ComparisonMode.STRICT); if (!toServerMaybe.isPresent()) { return BalanceAction.NULL_ACTION; } int toServer = toServerMaybe.get(); - Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer); + Optional fromServerMaybe = pickOverloadedServer(cluster, targetRegionsPerServer, ComparisonMode.STRICT); if (!fromServerMaybe.isPresent()) { return BalanceAction.NULL_ACTION; } @@ -233,17 +266,32 @@ private BalanceAction tryMoveRegionToSomeUnderloadedServer( return moveCell("fill underloaded - target = " + targetRegionsPerServer, fromServer, cell, toServer, cellGroupSizesPerServer, cluster); } - private Optional pickOverloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + enum ComparisonMode { + STRICT, + ALLOW_OFF_BY_ONE + } + + private Optional pickOverloadedServer( + BalancerClusterState cluster, + int targetRegionsPerServer, + ComparisonMode mode + ) { + int[][] regionsPerServer = cluster.regionsPerServer; Optional pickedServer = Optional.empty(); + int mostRegionsPerServerSoFar = Integer.MIN_VALUE; double reservoirRandom = -1; + int target = targetRegionsPerServer + (mode == ComparisonMode.STRICT ? 0 : 1); for (int server = 0; server < cluster.numServers; server++) { - if (cluster.regionsPerServer[server].length > targetRegionsPerServer) { + int[] regions = regionsPerServer[server]; + int numRegionsOnServer = regions.length; + if (numRegionsOnServer > target) { double candidateRandom = ThreadLocalRandom.current().nextDouble(); - if (!pickedServer.isPresent()) { + if (numRegionsOnServer > mostRegionsPerServerSoFar) { pickedServer = Optional.of(server); reservoirRandom = candidateRandom; - } else if (candidateRandom > reservoirRandom) { + mostRegionsPerServerSoFar = numRegionsOnServer; + } else if (numRegionsOnServer == mostRegionsPerServerSoFar && candidateRandom > reservoirRandom) { pickedServer = Optional.of(server); reservoirRandom = candidateRandom; } @@ -253,12 +301,17 @@ private Optional pickOverloadedServer(BalancerClusterState cluster, int return pickedServer; } - private Optional pickUnderloadedServer(BalancerClusterState cluster, int targetRegionsPerServer) { + private Optional pickUnderloadedServer( + BalancerClusterState cluster, + int targetRegionsPerServer, + ComparisonMode mode + ) { Optional pickedServer = Optional.empty(); double reservoirRandom = -1; + int target = targetRegionsPerServer + (mode == ComparisonMode.STRICT ? 0 : 1); for (int server = 0; server < cluster.numServers; server++) { - if (cluster.regionsPerServer[server].length < targetRegionsPerServer) { + if (cluster.regionsPerServer[server].length < target) { double candidateRandom = ThreadLocalRandom.current().nextDouble(); if (!pickedServer.isPresent()) { pickedServer = Optional.of(server); @@ -444,13 +497,13 @@ private MoveRegionAction moveCell( Map toCounts = cellGroupSizesPerServer.get(toServer); String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + - fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "")) + fromCounts.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> (entry.getKey() == fromCell ? "<<" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? ">>" : "")) .collect(Collectors.joining(", ", "{", "}")); String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + - toCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "!!" : "")) + toCounts.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> (entry.getKey() == fromCell ? ">>" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "<<" : "")) .collect(Collectors.joining(", ", "{", "}")); - LOG.debug("{}", String.format("[%20s]\t\tmove %d:%d -> %d %s -> %s\n", + LOG.debug("{}", String.format("[%20s]\t\tmove %d:%d -> %d\n\t %s\n\t-> %s\n", originStep, fromServer, fromCell, toServer, fromCountsString, toCountsString)); @@ -471,13 +524,13 @@ private SwapRegionsAction swapCells( Map toCounts = cellGroupSizesPerServer.get(toServer); String fromCountsString = fromCounts.values().stream().mapToInt(x -> x).sum() + "." + - fromCounts.entrySet().stream().map(entry -> (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? "**" : "") + (entry.getKey() == toCell ? "!!" : "")) + fromCounts.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> (entry.getKey() == fromCell ? "<<" : "") + (entry.getKey() == toCell ? ">>" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == fromCell ? ">>" : "") + (entry.getKey() == toCell ? "<<" : "")) .collect(Collectors.joining(", ", "{", "}")); String toCountsString = toCounts.values().stream().mapToInt(x -> x).sum() + "." + - toCounts.entrySet().stream().map(entry -> (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == toCell ? "**" : "") + (entry.getKey() == fromCell ? "!!" : "")) + toCounts.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(entry -> (entry.getKey() == toCell ? "<<" : "") + (entry.getKey() == fromCell ? ">>" : "") + entry.getKey() + "=" + entry.getValue() + (entry.getKey() == toCell ? ">>" : "") + (entry.getKey() == fromCell ? "<<" : "")) .collect(Collectors.joining(", ", "{", "}")); - LOG.debug("{}", String.format("[%20s]\t\tswap %3d:%3d <-> %3d:%3d %s <-> %s\n", + LOG.debug("{}", String.format("[%20s]\t\tswap %3d:%3d <-> %3d:%3d \n\t %s\n\t<-> %s\n", originStep, fromServer, fromCell, toServer, toCell, fromCountsString, toCountsString)); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 396243710996..7631d244433b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -23,6 +23,8 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; @@ -64,8 +66,11 @@ public class HubSpotCellCostFunction extends CostFunction { private Int2IntCounterMap regionCountByCell; private int maxAcceptableCellsPerServer; + private int balancedRegionsPerServer; private int numServerCellsOutsideDesiredBand; + private boolean[] serverIsBalanced; + private int numServersUnbalanced; private double cost; HubSpotCellCostFunction(Configuration conf) { @@ -115,17 +120,26 @@ && isNeeded() bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); this.maxAcceptableCellsPerServer = bestCaseMaxCellsPerServer; + this.balancedRegionsPerServer = Ints.checkedCast( + (long) Math.floor((double) cluster.numRegions / cluster.numServers)); + this.serverIsBalanced = new boolean[cluster.numServers]; + IntStream.range(0, cluster.numServers) + .forEach(server -> serverIsBalanced[server] = isBalanced(server)); + this.numServersUnbalanced = + Ints.checkedCast(IntStream.range(0, cluster.numServers).filter(server -> !serverIsBalanced[server]).count()); this.numServerCellsOutsideDesiredBand = calculateCurrentCountOfCellsOutsideDesiredBand( numCells, numServers, maxAcceptableCellsPerServer, - regions, regionIndexToServerIndex, + regions, + regionIndexToServerIndex, serverHasCell, super.cluster::getRegionSizeMB ); - this.cost = (double) this.numServerCellsOutsideDesiredBand / (cluster.numRegions); + + recomputeCost(); if (regions.length > 0 && regions[0].getTable().getNamespaceAsString().equals("default") @@ -149,11 +163,22 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { && currentClusterState.regions.length > 0; } + private boolean isBalanced(int server) { + return cluster.regionsPerServer[server].length >= balancedRegionsPerServer && cluster.regionsPerServer[server].length <= balancedRegionsPerServer + 1; + } + @Override protected void regionMoved(int region, int oldServer, int newServer) { RegionInfo movingRegion = regions[region]; Set cellsOnRegion = HubSpotCellUtilities.toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); + boolean isOldServerBalanced = isBalanced(oldServer); + this.serverIsBalanced[oldServer] = isOldServerBalanced; + boolean isNewServerBalanced = isBalanced(newServer); + this.serverIsBalanced[newServer] = isNewServerBalanced; + this.numServersUnbalanced = + Ints.checkedCast(IntStream.range(0, cluster.numServers).filter(server -> !serverIsBalanced[server]).count()); + if (LOG.isDebugEnabled()) { LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", region, @@ -167,20 +192,16 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { Map numRegionsForCellOnNewServer = computeCellFrequencyForServer(newServer); int currentCellCountOldServer = numRegionsForCellOnOldServer.keySet().size(); - int currentRegionCountOldServer = numRegionsForCellOnOldServer.values().stream().mapToInt(Integer::intValue).sum() + 1; int currentCellCountNewServer = numRegionsForCellOnNewServer.keySet().size(); - int currentRegionCountNewServer = numRegionsForCellOnNewServer.values().stream().mapToInt(Integer::intValue).sum() - 1; if (LOG.isDebugEnabled()) { LOG.debug( - "Old server {} [{} cells, {} regions] has cell frequency of {}.\n\nNew server {} [{} cells, {} regions] has cell frequency of {}.", + "Old server {} [{} cells] has cell frequency of {}.\n\nNew server {} [{} cells] has cell frequency of {}.", oldServer, currentCellCountOldServer, - currentRegionCountOldServer, numRegionsForCellOnOldServer, newServer, currentCellCountNewServer, - currentRegionCountNewServer, numRegionsForCellOnNewServer ); } @@ -214,8 +235,14 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { } this.numServerCellsOutsideDesiredBand += changeInRegionCellsOutsideDesiredBand; + recomputeCost(); + } - this.cost = (double) this.numServerCellsOutsideDesiredBand / (maxAcceptableCellsPerServer * cluster.numServers); + private void recomputeCost() { + double newCost = + (double) numServerCellsOutsideDesiredBand / (maxAcceptableCellsPerServer * cluster.numServers) + + numServersUnbalanced; + cost = newCost; } private Map computeCellFrequencyForServer(int server) { From 5a42bb374601d883142c60822b8b632d7d503529 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 4 Dec 2024 21:38:54 -0500 Subject: [PATCH 108/126] Add guard in case of error computing online cost, and do a deep reset if it goes negative --- .../balancer/HubSpotCellCostFunction.java | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 7631d244433b..75500375b537 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -105,6 +105,20 @@ && isNeeded() } } + computeCostFromScratch(); + + if (regions.length > 0 + && regions[0].getTable().getNamespaceAsString().equals("default") + ) { + if (LOG.isDebugEnabled()) { + LOG.debug("Evaluated (cost={})", String.format("%.4f", this.cost)); + } else if (LOG.isTraceEnabled()) { + LOG.trace("Evaluated (cost={}) {}", String.format("%.4f", this.cost), snapshotState()); + } + } + } + + private void computeCostFromScratch() { this.serverHasCell = new boolean[numServers][numCells]; int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); @@ -140,16 +154,6 @@ && isNeeded() ); recomputeCost(); - - if (regions.length > 0 - && regions[0].getTable().getNamespaceAsString().equals("default") - ) { - if (LOG.isDebugEnabled()) { - LOG.debug("Evaluated (cost={})", String.format("%.4f", this.cost)); - } else if (LOG.isTraceEnabled()) { - LOG.trace("Evaluated (cost={}) {}", String.format("%.4f", this.cost), snapshotState()); - } - } } @Override boolean isNeeded() { @@ -164,7 +168,8 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { } private boolean isBalanced(int server) { - return cluster.regionsPerServer[server].length >= balancedRegionsPerServer && cluster.regionsPerServer[server].length <= balancedRegionsPerServer + 1; + return cluster.regionsPerServer[server].length >= balancedRegionsPerServer + && cluster.regionsPerServer[server].length <= balancedRegionsPerServer + 1; } @Override protected void regionMoved(int region, int oldServer, int newServer) { @@ -236,6 +241,12 @@ private boolean isBalanced(int server) { this.numServerCellsOutsideDesiredBand += changeInRegionCellsOutsideDesiredBand; recomputeCost(); + + if (cost < 0.0) { + double negativeCost = cost; + computeCostFromScratch(); + LOG.warn("Cost went negative - recomputed from scratch. Adjusted from {} to {}", negativeCost, cost); + } } private void recomputeCost() { From b685173b3301092737af2e543ef09f16d75a6b68 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:39:14 -0500 Subject: [PATCH 109/126] Clean MutableRegionInfo --- .../hbase/client/MutableRegionInfo.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index 81e6d478b79d..a9382f3a9bed 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -23,7 +23,6 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -55,17 +54,17 @@ class MutableRegionInfo implements RegionInfo { // zookeeper as of 0.90.0 HBase. And now in DisableTableProcedure, finally we will create bunch // of UnassignProcedures and at the last of the procedure we will set the region state to // CLOSED, and will not change the offLine flag. - @Expose private boolean offLine; - @Expose private boolean split; - @Expose private final long regionId; - @Expose private final int replicaId; - @Expose private final byte[] regionName; - @Expose private final byte[] startKey; - @Expose private final byte[] endKey; - @Expose private final int hashCode; - @Expose private final String encodedName; - @Expose private final byte[] encodedNameAsBytes; - @Expose private final TableName tableName; + private boolean offLine; + private boolean split; + private final long regionId; + private final int replicaId; + private final byte[] regionName; + private final byte[] startKey; + private final byte[] endKey; + private final int hashCode; + private final String encodedName; + private final byte[] encodedNameAsBytes; + private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, final byte[] endKey, final long regionId, final int replicaId, boolean offLine, From cf40b932b76fd22c7722a600572d839132873782 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:39:54 -0500 Subject: [PATCH 110/126] Clean up ServerName --- .../src/main/java/org/apache/hadoop/hbase/ServerName.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 9f32e64b9ff4..baf0968dcc80 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -26,9 +26,7 @@ import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; - import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.collect.Interner; import org.apache.hbase.thirdparty.com.google.common.collect.Interners; @@ -83,15 +81,15 @@ public class ServerName implements Comparable, Serializable { */ public static final String UNKNOWN_SERVERNAME = "#unknown#"; - @Expose private final String serverName; - @Expose private final long startCode; + private final String serverName; + private final long startCode; private transient Address address; /** * Cached versioned bytes of this ServerName instance. * @see #getVersionedBytes() */ - @Expose private byte[] bytes; + private byte[] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList<>(0); /** From b7f108a06314c0301086f61186d84c5529141e21 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:40:21 -0500 Subject: [PATCH 111/126] Clean up TableName --- .../org/apache/hadoop/hbase/TableName.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index 0b077b724786..855daa560612 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -24,9 +24,7 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; - import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; /** @@ -94,14 +92,14 @@ public static boolean isMetaTableName(final TableName tn) { */ public static final TableName OLD_META_TABLE_NAME = getADummyTableName(OLD_META_STR); - @Expose private final byte[] name; - @Expose private final String nameAsString; - @Expose private final byte[] namespace; - @Expose private final String namespaceAsString; - @Expose private final byte[] qualifier; - @Expose private final String qualifierAsString; - @Expose private final boolean systemTable; - @Expose private final int hashCode; + private final byte[] name; + private final String nameAsString; + private final byte[] namespace; + private final String namespaceAsString; + private final byte[] qualifier; + private final String qualifierAsString; + private final boolean systemTable; + private final int hashCode; /** * Check passed byte array, "tableName", is legal user-space table name. From 2a22aea62dd1a64550cb90aeb8df019a39786efd Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:40:44 -0500 Subject: [PATCH 112/126] Clean up Address --- .../src/main/java/org/apache/hadoop/hbase/net/Address.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index 5b35bfbd0edb..5397352868cf 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -21,9 +21,7 @@ import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; - import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.net.HostAndPort; @@ -38,7 +36,7 @@ */ @InterfaceAudience.Public public class Address implements Comparable
{ - @Expose private final HostAndPort hostAndPort; + private final HostAndPort hostAndPort; private Address(HostAndPort hostAndPort) { this.hostAndPort = hostAndPort; From d87742d93227c5ec94aa19d74b5a825c8b222e4c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:42:00 -0500 Subject: [PATCH 113/126] Clean up BalancerClusterState --- .../master/balancer/BalancerClusterState.java | 99 +++++++++---------- 1 file changed, 48 insertions(+), 51 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index de1cb5793017..2428bb3e2fff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -53,68 +53,68 @@ class BalancerClusterState { private static final Logger LOG = LoggerFactory.getLogger(BalancerClusterState.class); - @Expose ServerName[] servers; + ServerName[] servers; // ServerName uniquely identifies a region server. multiple RS can run on the same host - @Expose String[] hosts; - @Expose String[] racks; - @Expose boolean multiServersPerHost = false; // whether or not any host has more than one server + String[] hosts; + String[] racks; + boolean multiServersPerHost = false; // whether or not any host has more than one server - @Expose ArrayList tables; - @Expose RegionInfo[] regions; - @Expose Deque[] regionLoads; + ArrayList tables; + RegionInfo[] regions; + Deque[] regionLoads; private RegionLocationFinder regionFinder; - @Expose int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality + int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality - @Expose int[] serverIndexToHostIndex; // serverIndex -> host index - @Expose int[] serverIndexToRackIndex; // serverIndex -> rack index + int[] serverIndexToHostIndex; // serverIndex -> host index + int[] serverIndexToRackIndex; // serverIndex -> rack index - @Expose int[][] regionsPerServer; // serverIndex -> region list - @Expose int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list - @Expose int[][] regionsPerHost; // hostIndex -> list of regions - @Expose int[][] regionsPerRack; // rackIndex -> region list - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated + int[][] regionsPerServer; // serverIndex -> region list + int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list + int[][] regionsPerHost; // hostIndex -> list of regions + int[][] regionsPerRack; // rackIndex -> region list + Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated // replicas by primary region index - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by + Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by // primary region index - @Expose Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by + Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by // primary region index - @Expose int[][] serversPerHost; // hostIndex -> list of server indexes - @Expose int[][] serversPerRack; // rackIndex -> list of server indexes - @Expose int[] regionIndexToServerIndex; // regionIndex -> serverIndex - @Expose int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) - @Expose int[] regionIndexToTableIndex; // regionIndex -> tableIndex - @Expose int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions - @Expose int[] numRegionsPerTable; // tableIndex -> region count - @Expose int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS - @Expose int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary - @Expose boolean hasRegionReplicas = false; // whether there is regions with replicas - - @Expose Integer[] serverIndicesSortedByRegionCount; - @Expose Integer[] serverIndicesSortedByLocality; - - @Expose Map serversToIndex; - @Expose Map hostsToIndex; - @Expose Map racksToIndex; - @Expose Map tablesToIndex; - @Expose Map regionsToIndex; - @Expose float[] localityPerServer; - - @Expose int numServers; - @Expose int numHosts; - @Expose int numRacks; - @Expose int numTables; - @Expose int numRegions; - - @Expose int numMovedRegions = 0; // num moved regions from the initial configuration - @Expose Map> clusterState; + int[][] serversPerHost; // hostIndex -> list of server indexes + int[][] serversPerRack; // rackIndex -> list of server indexes + int[] regionIndexToServerIndex; // regionIndex -> serverIndex + int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) + int[] regionIndexToTableIndex; // regionIndex -> tableIndex + int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions + int[] numRegionsPerTable; // tableIndex -> region count + int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary + boolean hasRegionReplicas = false; // whether there is regions with replicas + + Integer[] serverIndicesSortedByRegionCount; + Integer[] serverIndicesSortedByLocality; + + Map serversToIndex; + Map hostsToIndex; + Map racksToIndex; + Map tablesToIndex; + Map regionsToIndex; + float[] localityPerServer; + + int numServers; + int numHosts; + int numRacks; + int numTables; + int numRegions; + + int numMovedRegions = 0; // num moved regions from the initial configuration + Map> clusterState; private final RackManager rackManager; // Maps region -> rackIndex -> locality of region on rack - @Expose private float[][] rackLocalities; + private float[][] rackLocalities; // Maps localityType -> region -> [server|rack]Index with highest locality - @Expose private int[][] regionsToMostLocalEntities; + private int[][] regionsToMostLocalEntities; static class DefaultRackManager extends RackManager { @Override @@ -408,7 +408,6 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, if (regionFinder != null) { // region location List loc = regionFinder.getTopBlockLocations(region); - LOG.debug("{} is located on {} server", region.getRegionNameAsString(), loc.size()); regionLocations[regionIndex] = new int[loc.size()]; for (int i = 0; i < loc.size(); i++) { regionLocations[regionIndex][i] = loc.get(i) == null @@ -417,8 +416,6 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, ? -1 : serversToIndex.get(loc.get(i).getAddress())); } - } else { - LOG.warn("Region finder is null, not registering region {}", region.getRegionNameAsString()); } } From 0f67a71b24531054f3edce088f2dd9f28a30ab3e Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:42:49 -0500 Subject: [PATCH 114/126] Clean up RegionLocationFinder --- .../hadoop/hbase/master/balancer/BalancerClusterState.java | 1 - .../hadoop/hbase/master/balancer/RegionLocationFinder.java | 5 ----- 2 files changed, 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 2428bb3e2fff..4a9bdfee708a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; -import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index 85b15599e580..cbab031e09d0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -44,7 +44,6 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader; import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache; @@ -245,15 +244,11 @@ protected List getTopBlockLocations(RegionInfo region, String curren * @return ordered list of hosts holding blocks of the specified region */ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) { - String regionNameAsString = region.getRegionNameAsString(); - LOG.debug("Fetching top block locations for {}", regionNameAsString); try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { - LOG.debug("Region {} is located on {}", regionNameAsString, tableDescriptor.getTableName().getNameAsString()); HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); - LOG.debug("Top hosts for region {}: {}", regionNameAsString, blocksDistribution.getTopHosts()); return blocksDistribution; } } catch (IOException ioe) { From b425e9a5fe680c5df22e09a34c723e80419cd191 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:43:50 -0500 Subject: [PATCH 115/126] Clean up StochasticLoadBalancer --- .../hbase/master/balancer/StochasticLoadBalancer.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 9dd23399d090..fc8275f90e1b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -448,11 +448,7 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") BalanceAction nextAction(BalancerClusterState cluster) { - CandidateGenerator generator = getRandomGenerator(); - if (LOG.isTraceEnabled()) { - LOG.trace("Using generator {}", generator.getClass().getSimpleName()); - } - return generator.generate(cluster); + return getRandomGenerator().generate(cluster); } /** @@ -478,7 +474,6 @@ protected CandidateGenerator getRandomGenerator() { return candidateGenerators.get(i); } } - return candidateGenerators.get(candidateGenerators.size() - 1); } From d8e9f9a192436e3bc32411607553f1ae2019b94c Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:45:44 -0500 Subject: [PATCH 116/126] More cleanup StochasticLoadBalancer --- .../balancer/StochasticLoadBalancer.java | 28 ++++++------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index fc8275f90e1b..12571ee4e369 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -519,8 +519,6 @@ protected List balanceTable(TableName tableName, || (this.cellCostFunction != null && this.cellCostFunction.getMultiplier() > 0) ) { finder = this.regionFinder; - } else { - LOG.debug("Didn't detect a need for region finder, disabling"); } // The clusterState that is given to this method contains the state @@ -563,17 +561,17 @@ protected List balanceTable(TableName tableName, computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps); if (calculatedMaxSteps > maxSteps) { LOG.warn( - "[{}] calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "maxSteps:{}. Hence load balancing may not work well. Setting parameter " + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue." + "(This config change does not require service restart)", - tableName.getNameWithNamespaceInclAsString(), calculatedMaxSteps, maxSteps); + calculatedMaxSteps, maxSteps); } } LOG.info( - "[{}] Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "functionCost={} computedMaxSteps={}", - tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), computedMaxSteps); + currentCost / sumMultiplier, functionCost(), computedMaxSteps); final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. @@ -593,22 +591,12 @@ protected List balanceTable(TableName tableName, // Should this be kept? if (newCost < currentCost) { - if(LOG.isTraceEnabled()) { - LOG.trace(" S[{}]: {} -> {} via {} -- {}", - step, currentCost, newCost, action, totalCostsPerFunc()); - } - currentCost = newCost; // save for JMX curOverallCost = currentCost; System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); } else { - if(LOG.isTraceEnabled()) { - LOG.trace(" S[{}]: {} -> {} via {} -- {}", - step, currentCost, newCost, action, totalCostsPerFunc()); - } - // Put things back the way they were before. // TODO: undo by remembering old values BalanceAction undoAction = action.undoAction(); @@ -628,19 +616,19 @@ protected List balanceTable(TableName tableName, updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); plans = createRegionPlans(cluster); LOG.info( - "[{}] Finished computing new moving plan. Computation took {} ms" + "Finished computing new moving plan. Computation took {} ms" + " to try {} different iterations. Found a solution that moves " + "{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. funtionCost={}", - tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), initCost / sumMultiplier, + endTime - startTime, step, plans.size(), initCost / sumMultiplier, currentCost / sumMultiplier, functionCost()); sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); return plans; } LOG.info( - "[{}] Could not find a better moving plan. Tried {} different configurations in " + "Could not find a better moving plan. Tried {} different configurations in " + "{} ms, and did not find anything with an imbalance score less than {}", - tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, initCost / sumMultiplier); + step, endTime - startTime, initCost / sumMultiplier); return null; } From 2337b4cdb6518e1782ddfd3f9c7b4a7c7641e189 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:47:14 -0500 Subject: [PATCH 117/126] Clean up RegionNormalizerFactory --- .../hbase/master/normalizer/RegionNormalizerFactory.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index 04241ed04f69..f855931e1a41 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -59,8 +59,8 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer Class balancerKlass = - conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - RegionNormalizer.class); + conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + RegionNormalizer.class); return ReflectionUtils.newInstance(balancerKlass, conf); } } From 6caeb487e1e5564485d3e3039244c1f1c73da209 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:51:26 -0500 Subject: [PATCH 118/126] clean imports --- .../src/main/java/org/apache/hadoop/hbase/ServerName.java | 1 + .../src/main/java/org/apache/hadoop/hbase/TableName.java | 1 + .../src/main/java/org/apache/hadoop/hbase/net/Address.java | 1 + .../hadoop/hbase/master/balancer/RegionLocationFinder.java | 1 + 4 files changed, 4 insertions(+) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index baf0968dcc80..5223bac3e5b1 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; + import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.collect.Interner; import org.apache.hbase.thirdparty.com.google.common.collect.Interners; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index 855daa560612..c799fb9b2f78 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -25,6 +25,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.yetus.audience.InterfaceAudience; + import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index 5397352868cf..ef3520b31c78 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.yetus.audience.InterfaceAudience; + import org.apache.hbase.thirdparty.com.google.common.base.Splitter; import org.apache.hbase.thirdparty.com.google.common.net.HostAndPort; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index cbab031e09d0..ab873380268d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -44,6 +44,7 @@ import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import org.apache.hbase.thirdparty.com.google.common.cache.CacheBuilder; import org.apache.hbase.thirdparty.com.google.common.cache.CacheLoader; import org.apache.hbase.thirdparty.com.google.common.cache.LoadingCache; From e8658502f3e138ab0d06d16dec660d36cc951e74 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:52:36 -0500 Subject: [PATCH 119/126] style --- .../hadoop/hbase/master/balancer/StochasticLoadBalancer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 12571ee4e369..98e2d9c0ceb3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -281,7 +281,7 @@ protected void loadConf(Configuration conf) { // HubSpot addition: if (cellCostFunction.getMultiplier() > 0) { - addCostFunction(cellCostFunction); + addCostFunction(cellCostFunction); } loadCustomCostFunctions(conf); From f8d48d9a224499e3932c35ce0d4899cc84548c96 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Thu, 5 Dec 2024 12:53:05 -0500 Subject: [PATCH 120/126] Style --- .../hadoop/hbase/master/normalizer/RegionNormalizerFactory.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index f855931e1a41..f97622b40631 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -64,4 +64,3 @@ private static RegionNormalizer getRegionNormalizer(Configuration conf) { return ReflectionUtils.newInstance(balancerKlass, conf); } } - From 70bc20fb6f47d5743374c350d6a095e58c7b64db Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 6 Dec 2024 14:26:20 -0500 Subject: [PATCH 121/126] Small clusters may not have enough regions/cell to support lower isolation --- .../hadoop/hbase/hubspot/HubSpotCellUtilities.java | 10 +++++++++- .../balancer/HubSpotCellBasedCandidateGenerator.java | 2 +- .../hbase/master/balancer/HubSpotCellCostFunction.java | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index 134617040c61..bc8278581689 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -1,5 +1,6 @@ package org.apache.hadoop.hbase.hubspot; +import com.google.common.primitives.Ints; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; @@ -31,7 +32,7 @@ public final class HubSpotCellUtilities { // TODO: this should be dynamically configured, not hard-coded, but this dramatically simplifies the initial version public static final short MAX_CELL_COUNT = 360; - public static final int MAX_CELLS_PER_RS = 36; + private static final int TARGET_MAX_CELLS_PER_RS = 36; public static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation() @@ -85,6 +86,13 @@ public final class HubSpotCellUtilities { private HubSpotCellUtilities() {} + public static int getMaxCellsPerRs(int servers) { + return Math.max( + TARGET_MAX_CELLS_PER_RS, + Ints.checkedCast( (long)Math.floor((double) MAX_CELL_COUNT / servers)) + ); + } + public static String toCellSetString(Set cells) { return cells.stream().sorted().map(x -> Short.toString(x)).collect(Collectors.joining(", ", "{", "}")); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java index ede3f1324a81..87a8817f668b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellBasedCandidateGenerator.java @@ -106,7 +106,7 @@ private BalanceAction generateAction( } int targetCellsPerServer = targetRegionsPerServer - numTimesCellRegionsFillAllServers; - targetCellsPerServer = Math.min(targetCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + targetCellsPerServer = Math.min(targetCellsPerServer, HubSpotCellUtilities.getMaxCellsPerRs(cluster.numServers)); Set serversBelowTarget = new HashSet<>(); Set serversAboveTarget = new HashSet<>(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 75500375b537..c0635ebea7ee 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -132,7 +132,7 @@ private void computeCostFromScratch() { } bestCaseMaxCellsPerServer -= numTimesCellRegionsFillAllServers; - bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.MAX_CELLS_PER_RS); + bestCaseMaxCellsPerServer = Math.min(bestCaseMaxCellsPerServer, HubSpotCellUtilities.getMaxCellsPerRs(cluster.numServers)); this.maxAcceptableCellsPerServer = bestCaseMaxCellsPerServer; this.balancedRegionsPerServer = Ints.checkedCast( (long) Math.floor((double) cluster.numRegions / cluster.numServers)); From f0cf9ff43fbfab4ba1c5a5c0186993e1c859869f Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Fri, 6 Dec 2024 14:36:23 -0500 Subject: [PATCH 122/126] Fix which Ints --- .../org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index bc8278581689..cee762b02140 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -1,12 +1,12 @@ package org.apache.hadoop.hbase.hubspot; -import com.google.common.primitives.Ints; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; +import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; import org.apache.hbase.thirdparty.com.google.common.primitives.Shorts; import org.apache.hbase.thirdparty.com.google.gson.ExclusionStrategy; import org.apache.hbase.thirdparty.com.google.gson.FieldAttributes; From 71ecc2be70b1e4a5ef0403179d2ef0969f12aa12 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 16 Dec 2024 11:23:50 -0500 Subject: [PATCH 123/126] Emit the cluster state at the end of balance --- .../balancer/HubSpotCellCostFunction.java | 30 ++++++++++--------- .../balancer/StochasticLoadBalancer.java | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index c0635ebea7ee..0321cf943628 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -24,7 +24,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; -import java.util.stream.Stream; import org.agrona.collections.Int2IntCounterMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ServerName; @@ -32,13 +31,13 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.hubspot.HubSpotCellUtilities; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMultimap; +import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; /** @@ -93,18 +92,7 @@ void prepare(BalancerClusterState cluster) { regionIndexToServerIndex = cluster.regionIndexToServerIndex; servers = cluster.servers; - if (LOG.isTraceEnabled() - && isNeeded() - && cluster.regions != null - && cluster.regions.length > 0 - ) { - try { - LOG.trace("{} cluster state:\n{}", cluster.tables, HubSpotCellUtilities.OBJECT_MAPPER.toJson(cluster)); - } catch (Exception ex) { - LOG.error("Failed to write cluster state", ex); - } - } - + emitClusterState(); computeCostFromScratch(); if (regions.length > 0 @@ -118,6 +106,20 @@ && isNeeded() } } + void emitClusterState() { + if (LOG.isTraceEnabled() + && isNeeded() + && cluster.regions != null + && cluster.regions.length > 0) { + try { + LOG.trace("{} cluster state:\n{}", cluster.tables, + HubSpotCellUtilities.OBJECT_MAPPER.toJson(cluster)); + } catch (Exception ex) { + LOG.error("Failed to write cluster state", ex); + } + } + } + private void computeCostFromScratch() { this.serverHasCell = new boolean[numServers][numCells]; int bestCaseMaxCellsPerServer = Ints.checkedCast((long) Math.ceil((double) cluster.numRegions / cluster.numServers)); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 98e2d9c0ceb3..f210201e8e58 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -611,6 +611,7 @@ protected List balanceTable(TableName tableName, long endTime = EnvironmentEdgeManager.currentTime(); metricsBalancer.balanceCluster(endTime - startTime); + cellCostFunction.emitClusterState(); if (initCost > currentCost) { updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); From da0834ec752934ca4d7310f430fbf6e63f97d246 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Mon, 16 Dec 2024 12:33:37 -0500 Subject: [PATCH 124/126] Revert "Remove all the debugging changes, generally make ready for real review & merge" --- .../hbase/client/MutableRegionInfo.java | 23 ++-- .../org/apache/hadoop/hbase/ServerName.java | 7 +- .../org/apache/hadoop/hbase/TableName.java | 17 +-- .../org/apache/hadoop/hbase/net/Address.java | 3 +- .../master/balancer/BalancerClusterState.java | 100 +++++++++--------- .../master/balancer/RegionLocationFinder.java | 4 + .../balancer/StochasticLoadBalancer.java | 35 ++++-- .../normalizer/RegionNormalizerFactory.java | 4 +- 8 files changed, 111 insertions(+), 82 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java index a9382f3a9bed..81e6d478b79d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MutableRegionInfo.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,17 +55,17 @@ class MutableRegionInfo implements RegionInfo { // zookeeper as of 0.90.0 HBase. And now in DisableTableProcedure, finally we will create bunch // of UnassignProcedures and at the last of the procedure we will set the region state to // CLOSED, and will not change the offLine flag. - private boolean offLine; - private boolean split; - private final long regionId; - private final int replicaId; - private final byte[] regionName; - private final byte[] startKey; - private final byte[] endKey; - private final int hashCode; - private final String encodedName; - private final byte[] encodedNameAsBytes; - private final TableName tableName; + @Expose private boolean offLine; + @Expose private boolean split; + @Expose private final long regionId; + @Expose private final int replicaId; + @Expose private final byte[] regionName; + @Expose private final byte[] startKey; + @Expose private final byte[] endKey; + @Expose private final int hashCode; + @Expose private final String encodedName; + @Expose private final byte[] encodedNameAsBytes; + @Expose private final TableName tableName; private static int generateHashCode(final TableName tableName, final byte[] startKey, final byte[] endKey, final long regionId, final int replicaId, boolean offLine, diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 5223bac3e5b1..9f32e64b9ff4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; @@ -82,15 +83,15 @@ public class ServerName implements Comparable, Serializable { */ public static final String UNKNOWN_SERVERNAME = "#unknown#"; - private final String serverName; - private final long startCode; + @Expose private final String serverName; + @Expose private final long startCode; private transient Address address; /** * Cached versioned bytes of this ServerName instance. * @see #getVersionedBytes() */ - private byte[] bytes; + @Expose private byte[] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList<>(0); /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java index c799fb9b2f78..0b077b724786 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/TableName.java @@ -24,6 +24,7 @@ import java.util.concurrent.CopyOnWriteArraySet; import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; @@ -93,14 +94,14 @@ public static boolean isMetaTableName(final TableName tn) { */ public static final TableName OLD_META_TABLE_NAME = getADummyTableName(OLD_META_STR); - private final byte[] name; - private final String nameAsString; - private final byte[] namespace; - private final String namespaceAsString; - private final byte[] qualifier; - private final String qualifierAsString; - private final boolean systemTable; - private final int hashCode; + @Expose private final byte[] name; + @Expose private final String nameAsString; + @Expose private final byte[] namespace; + @Expose private final String namespaceAsString; + @Expose private final byte[] qualifier; + @Expose private final String qualifierAsString; + @Expose private final boolean systemTable; + @Expose private final int hashCode; /** * Check passed byte array, "tableName", is legal user-space table name. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index ef3520b31c78..5b35bfbd0edb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -21,6 +21,7 @@ import java.util.Iterator; import java.util.List; import org.apache.commons.lang3.StringUtils; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hbase.thirdparty.com.google.common.base.Splitter; @@ -37,7 +38,7 @@ */ @InterfaceAudience.Public public class Address implements Comparable
{ - private final HostAndPort hostAndPort; + @Expose private final HostAndPort hostAndPort; private Address(HostAndPort hostAndPort) { this.hostAndPort = hostAndPort; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index 4a9bdfee708a..de1cb5793017 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.net.Address; +import org.apache.hbase.thirdparty.com.google.gson.annotations.Expose; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,68 +53,68 @@ class BalancerClusterState { private static final Logger LOG = LoggerFactory.getLogger(BalancerClusterState.class); - ServerName[] servers; + @Expose ServerName[] servers; // ServerName uniquely identifies a region server. multiple RS can run on the same host - String[] hosts; - String[] racks; - boolean multiServersPerHost = false; // whether or not any host has more than one server + @Expose String[] hosts; + @Expose String[] racks; + @Expose boolean multiServersPerHost = false; // whether or not any host has more than one server - ArrayList tables; - RegionInfo[] regions; - Deque[] regionLoads; + @Expose ArrayList tables; + @Expose RegionInfo[] regions; + @Expose Deque[] regionLoads; private RegionLocationFinder regionFinder; - int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality + @Expose int[][] regionLocations; // regionIndex -> list of serverIndex sorted by locality - int[] serverIndexToHostIndex; // serverIndex -> host index - int[] serverIndexToRackIndex; // serverIndex -> rack index + @Expose int[] serverIndexToHostIndex; // serverIndex -> host index + @Expose int[] serverIndexToRackIndex; // serverIndex -> rack index - int[][] regionsPerServer; // serverIndex -> region list - int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list - int[][] regionsPerHost; // hostIndex -> list of regions - int[][] regionsPerRack; // rackIndex -> region list - Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated + @Expose int[][] regionsPerServer; // serverIndex -> region list + @Expose int[] serverIndexToRegionsOffset; // serverIndex -> offset of region list + @Expose int[][] regionsPerHost; // hostIndex -> list of regions + @Expose int[][] regionsPerRack; // rackIndex -> region list + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerServer; // serverIndex -> counts of colocated // replicas by primary region index - Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerHost; // hostIndex -> counts of colocated replicas by // primary region index - Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by + @Expose Int2IntCounterMap[] colocatedReplicaCountsPerRack; // rackIndex -> counts of colocated replicas by // primary region index - int[][] serversPerHost; // hostIndex -> list of server indexes - int[][] serversPerRack; // rackIndex -> list of server indexes - int[] regionIndexToServerIndex; // regionIndex -> serverIndex - int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) - int[] regionIndexToTableIndex; // regionIndex -> tableIndex - int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions - int[] numRegionsPerTable; // tableIndex -> region count - int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS - int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary - boolean hasRegionReplicas = false; // whether there is regions with replicas - - Integer[] serverIndicesSortedByRegionCount; - Integer[] serverIndicesSortedByLocality; - - Map serversToIndex; - Map hostsToIndex; - Map racksToIndex; - Map tablesToIndex; - Map regionsToIndex; - float[] localityPerServer; - - int numServers; - int numHosts; - int numRacks; - int numTables; - int numRegions; - - int numMovedRegions = 0; // num moved regions from the initial configuration - Map> clusterState; + @Expose int[][] serversPerHost; // hostIndex -> list of server indexes + @Expose int[][] serversPerRack; // rackIndex -> list of server indexes + @Expose int[] regionIndexToServerIndex; // regionIndex -> serverIndex + @Expose int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) + @Expose int[] regionIndexToTableIndex; // regionIndex -> tableIndex + @Expose int[][] numRegionsPerServerPerTable; // tableIndex -> serverIndex -> # regions + @Expose int[] numRegionsPerTable; // tableIndex -> region count + @Expose int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + @Expose int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary + @Expose boolean hasRegionReplicas = false; // whether there is regions with replicas + + @Expose Integer[] serverIndicesSortedByRegionCount; + @Expose Integer[] serverIndicesSortedByLocality; + + @Expose Map serversToIndex; + @Expose Map hostsToIndex; + @Expose Map racksToIndex; + @Expose Map tablesToIndex; + @Expose Map regionsToIndex; + @Expose float[] localityPerServer; + + @Expose int numServers; + @Expose int numHosts; + @Expose int numRacks; + @Expose int numTables; + @Expose int numRegions; + + @Expose int numMovedRegions = 0; // num moved regions from the initial configuration + @Expose Map> clusterState; private final RackManager rackManager; // Maps region -> rackIndex -> locality of region on rack - private float[][] rackLocalities; + @Expose private float[][] rackLocalities; // Maps localityType -> region -> [server|rack]Index with highest locality - private int[][] regionsToMostLocalEntities; + @Expose private int[][] regionsToMostLocalEntities; static class DefaultRackManager extends RackManager { @Override @@ -407,6 +408,7 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, if (regionFinder != null) { // region location List loc = regionFinder.getTopBlockLocations(region); + LOG.debug("{} is located on {} server", region.getRegionNameAsString(), loc.size()); regionLocations[regionIndex] = new int[loc.size()]; for (int i = 0; i < loc.size(); i++) { regionLocations[regionIndex][i] = loc.get(i) == null @@ -415,6 +417,8 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex, ? -1 : serversToIndex.get(loc.get(i).getAddress())); } + } else { + LOG.warn("Region finder is null, not registering region {}", region.getRegionNameAsString()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java index ab873380268d..85b15599e580 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java @@ -245,11 +245,15 @@ protected List getTopBlockLocations(RegionInfo region, String curren * @return ordered list of hosts holding blocks of the specified region */ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) { + String regionNameAsString = region.getRegionNameAsString(); + LOG.debug("Fetching top block locations for {}", regionNameAsString); try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { + LOG.debug("Region {} is located on {}", regionNameAsString, tableDescriptor.getTableName().getNameAsString()); HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); + LOG.debug("Top hosts for region {}: {}", regionNameAsString, blocksDistribution.getTopHosts()); return blocksDistribution; } } catch (IOException ioe) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index f210201e8e58..cefe23b26d71 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -448,7 +448,11 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { @RestrictedApi(explanation = "Should only be called in tests", link = "", allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java") BalanceAction nextAction(BalancerClusterState cluster) { - return getRandomGenerator().generate(cluster); + CandidateGenerator generator = getRandomGenerator(); + if (LOG.isTraceEnabled()) { + LOG.trace("Using generator {}", generator.getClass().getSimpleName()); + } + return generator.generate(cluster); } /** @@ -474,6 +478,7 @@ protected CandidateGenerator getRandomGenerator() { return candidateGenerators.get(i); } } + return candidateGenerators.get(candidateGenerators.size() - 1); } @@ -519,6 +524,8 @@ protected List balanceTable(TableName tableName, || (this.cellCostFunction != null && this.cellCostFunction.getMultiplier() > 0) ) { finder = this.regionFinder; + } else { + LOG.debug("Didn't detect a need for region finder, disabling"); } // The clusterState that is given to this method contains the state @@ -561,17 +568,17 @@ protected List balanceTable(TableName tableName, computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps); if (calculatedMaxSteps > maxSteps) { LOG.warn( - "calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "[{}] calculatedMaxSteps:{} for loadbalancer's stochastic walk is larger than " + "maxSteps:{}. Hence load balancing may not work well. Setting parameter " + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue." + "(This config change does not require service restart)", - calculatedMaxSteps, maxSteps); + tableName.getNameWithNamespaceInclAsString(), calculatedMaxSteps, maxSteps); } } LOG.info( - "Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "[{}] Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, " + "functionCost={} computedMaxSteps={}", - currentCost / sumMultiplier, functionCost(), computedMaxSteps); + tableName.getNameWithNamespaceInclAsString(), currentCost / sumMultiplier, functionCost(), computedMaxSteps); final String initFunctionTotalCosts = totalCostsPerFunc(); // Perform a stochastic walk to see if we can get a good fit. @@ -591,12 +598,22 @@ protected List balanceTable(TableName tableName, // Should this be kept? if (newCost < currentCost) { + if(LOG.isTraceEnabled()) { + LOG.trace(" S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); + } + currentCost = newCost; // save for JMX curOverallCost = currentCost; System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length); } else { + if(LOG.isTraceEnabled()) { + LOG.trace(" S[{}]: {} -> {} via {} -- {}", + step, currentCost, newCost, action, totalCostsPerFunc()); + } + // Put things back the way they were before. // TODO: undo by remembering old values BalanceAction undoAction = action.undoAction(); @@ -617,19 +634,19 @@ protected List balanceTable(TableName tableName, updateStochasticCosts(tableName, curOverallCost, curFunctionCosts); plans = createRegionPlans(cluster); LOG.info( - "Finished computing new moving plan. Computation took {} ms" + "[{}] Finished computing new moving plan. Computation took {} ms" + " to try {} different iterations. Found a solution that moves " + "{} regions; Going from a computed imbalance of {}" + " to a new imbalance of {}. funtionCost={}", - endTime - startTime, step, plans.size(), initCost / sumMultiplier, + tableName.getNameWithNamespaceInclAsString(), endTime - startTime, step, plans.size(), initCost / sumMultiplier, currentCost / sumMultiplier, functionCost()); sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step); return plans; } LOG.info( - "Could not find a better moving plan. Tried {} different configurations in " + "[{}] Could not find a better moving plan. Tried {} different configurations in " + "{} ms, and did not find anything with an imbalance score less than {}", - step, endTime - startTime, initCost / sumMultiplier); + tableName.getNameWithNamespaceInclAsString(), step, endTime - startTime, initCost / sumMultiplier); return null; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java index f97622b40631..2e18740d3d30 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/normalizer/RegionNormalizerFactory.java @@ -59,8 +59,8 @@ public static RegionNormalizerManager createNormalizerManager(final Configuratio private static RegionNormalizer getRegionNormalizer(Configuration conf) { // Create instance of Region Normalizer Class balancerKlass = - conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, - RegionNormalizer.class); + conf.getClass(HConstants.HBASE_MASTER_NORMALIZER_CLASS, SimpleRegionNormalizer.class, + RegionNormalizer.class); return ReflectionUtils.newInstance(balancerKlass, conf); } } From 25e0cf948f05f851dd93f9878e37ebbbb499a4da Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Wed, 18 Dec 2024 10:22:07 -0500 Subject: [PATCH 125/126] Measure this distance by region count from balanced --- .../balancer/HubSpotCellCostFunction.java | 41 +++++++++++-------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java index 0321cf943628..099f91a08ebb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/HubSpotCellCostFunction.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -68,8 +69,8 @@ public class HubSpotCellCostFunction extends CostFunction { private int balancedRegionsPerServer; private int numServerCellsOutsideDesiredBand; - private boolean[] serverIsBalanced; - private int numServersUnbalanced; + private int[] serverDistanceFromBalanced; + private int numRegionsAwayFromBalance; private double cost; HubSpotCellCostFunction(Configuration conf) { @@ -138,11 +139,13 @@ private void computeCostFromScratch() { this.maxAcceptableCellsPerServer = bestCaseMaxCellsPerServer; this.balancedRegionsPerServer = Ints.checkedCast( (long) Math.floor((double) cluster.numRegions / cluster.numServers)); - this.serverIsBalanced = new boolean[cluster.numServers]; + this.serverDistanceFromBalanced = new int[cluster.numServers]; IntStream.range(0, cluster.numServers) - .forEach(server -> serverIsBalanced[server] = isBalanced(server)); - this.numServersUnbalanced = - Ints.checkedCast(IntStream.range(0, cluster.numServers).filter(server -> !serverIsBalanced[server]).count()); + .forEach(server -> serverDistanceFromBalanced[server] = numRegionsAwayFromBalanced(server)); + List regionsPerServer = IntStream.range(0, cluster.numServers).boxed() + .map(server -> cluster.regionsPerServer[server].length).collect(Collectors.toList()); + this.numRegionsAwayFromBalance = + Ints.checkedCast(Arrays.stream(serverDistanceFromBalanced, 0, cluster.numServers).sum()); this.numServerCellsOutsideDesiredBand = calculateCurrentCountOfCellsOutsideDesiredBand( @@ -169,9 +172,15 @@ private boolean isNeeded(BalancerClusterState currentClusterState) { && currentClusterState.regions.length > 0; } - private boolean isBalanced(int server) { - return cluster.regionsPerServer[server].length >= balancedRegionsPerServer - && cluster.regionsPerServer[server].length <= balancedRegionsPerServer + 1; + private int numRegionsAwayFromBalanced(int server) { + int serverNumRegions = cluster.regionsPerServer[server].length; + if (serverNumRegions < balancedRegionsPerServer) { + return balancedRegionsPerServer - serverNumRegions; + } else if (balancedRegionsPerServer + 1 < serverNumRegions) { + return serverNumRegions - (balancedRegionsPerServer + 1); + } + + return 0; } @Override protected void regionMoved(int region, int oldServer, int newServer) { @@ -179,12 +188,12 @@ private boolean isBalanced(int server) { Set cellsOnRegion = HubSpotCellUtilities.toCells(movingRegion.getStartKey(), movingRegion.getEndKey(), numCells); - boolean isOldServerBalanced = isBalanced(oldServer); - this.serverIsBalanced[oldServer] = isOldServerBalanced; - boolean isNewServerBalanced = isBalanced(newServer); - this.serverIsBalanced[newServer] = isNewServerBalanced; - this.numServersUnbalanced = - Ints.checkedCast(IntStream.range(0, cluster.numServers).filter(server -> !serverIsBalanced[server]).count()); + int oldServerNumRegionsAwayFromBalanced = numRegionsAwayFromBalanced(oldServer); + this.serverDistanceFromBalanced[oldServer] = oldServerNumRegionsAwayFromBalanced; + int newServerNumRegionsAwayFromBalanced = numRegionsAwayFromBalanced(newServer); + this.serverDistanceFromBalanced[newServer] = newServerNumRegionsAwayFromBalanced; + this.numRegionsAwayFromBalance = + Ints.checkedCast(Arrays.stream(serverDistanceFromBalanced, 0, cluster.numServers).sum()); if (LOG.isDebugEnabled()) { LOG.debug("Evaluating move of region {} [{}, {}). Cells are {}.", @@ -254,7 +263,7 @@ private boolean isBalanced(int server) { private void recomputeCost() { double newCost = (double) numServerCellsOutsideDesiredBand / (maxAcceptableCellsPerServer * cluster.numServers) - + numServersUnbalanced; + + numRegionsAwayFromBalance; cost = newCost; } From c2fde1dfc02d654c188b17fc76f504a6c91f0f56 Mon Sep 17 00:00:00 2001 From: Elias Szabo Date: Tue, 7 Jan 2025 14:21:19 -0500 Subject: [PATCH 126/126] Set target to 20% of cells --- .../org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java index cee762b02140..7ec0f290deb5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/hubspot/HubSpotCellUtilities.java @@ -32,7 +32,7 @@ public final class HubSpotCellUtilities { // TODO: this should be dynamically configured, not hard-coded, but this dramatically simplifies the initial version public static final short MAX_CELL_COUNT = 360; - private static final int TARGET_MAX_CELLS_PER_RS = 36; + private static final int TARGET_MAX_CELLS_PER_RS = 72; public static final Gson OBJECT_MAPPER = new GsonBuilder() .excludeFieldsWithoutExposeAnnotation()