Skip to content

Commit ec56feb

Browse files
Merge pull request #2997 from AlexandreSinger/feature-ap-mass-legalizer
[AP][MassLegalizer] Revistited Mass Legalizer
2 parents 796f652 + f336abb commit ec56feb

File tree

4 files changed

+138
-58
lines changed

4 files changed

+138
-58
lines changed

vpr/src/analytical_place/partial_legalizer.cpp

+105-57
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ void BiPartitioningPartialLegalizer::spread_over_windows(std::vector<SpreadingWi
12711271
num_blocks_partitioned_ += window.contained_blocks.size();
12721272

12731273
// 2) Partition the window.
1274-
auto partitioned_window = partition_window(window);
1274+
auto partitioned_window = partition_window(window, group_id);
12751275

12761276
// 3) Partition the blocks.
12771277
partition_blocks_in_window(window, partitioned_window, group_id, p_placement);
@@ -1311,61 +1311,111 @@ void BiPartitioningPartialLegalizer::spread_over_windows(std::vector<SpreadingWi
13111311
VTR_ASSERT_SAFE(density_manager_->verify());
13121312
}
13131313

1314-
PartitionedWindow BiPartitioningPartialLegalizer::partition_window(SpreadingWindow& window) {
1314+
PartitionedWindow BiPartitioningPartialLegalizer::partition_window(
1315+
SpreadingWindow& window,
1316+
ModelGroupId group_id) {
1317+
1318+
// Search for the ideal partition line on the window. Here, we attempt each
1319+
// partition and measure how well this cuts the capacity of the region in
1320+
// half. Cutting the capacity of the region in half should allow the blocks
1321+
// within the region to also be cut in half (assuming a good initial window
1322+
// was chosen). This should allow the spreader to spread things more evenly
1323+
// and converge faster. Hence, it is worth spending more time trying to find
1324+
// better partition lines.
1325+
//
1326+
// Here, we compute the score of a partition as a number between 0 and 1
1327+
// which represents how balanced the partition is. 0 means that all of the
1328+
// capacity is on one side of the partition, 1 means that the capacities of
1329+
// the two partitions are perfectly balanced (equal on both sides).
1330+
float best_score = -1.0f;
13151331
PartitionedWindow partitioned_window;
1332+
const std::vector<int>& model_indices = model_grouper_.get_models_in_group(group_id);
13161333

1317-
// Select the partition direction.
1318-
// To keep it simple, we partition the direction which would cut the
1319-
// region the most.
1320-
// TODO: Should explore making the partition line based on the capacity
1321-
// of the two partitioned regions. We may want to cut the
1322-
// region in half such that the mass of the atoms contained within
1323-
// the two future regions is equal.
1324-
partitioned_window.partition_dir = e_partition_dir::VERTICAL;
1325-
if (window.region.height() > window.region.width())
1326-
partitioned_window.partition_dir = e_partition_dir::HORIZONTAL;
1327-
1328-
// To keep it simple, just cut the space in half.
1329-
// TODO: Should investigate other cutting techniques. Cutting perfectly
1330-
// in half may not be the most efficient technique.
1331-
SpreadingWindow& lower_window = partitioned_window.lower_window;
1332-
SpreadingWindow& upper_window = partitioned_window.upper_window;
1333-
partitioned_window.pivot_pos = 0.f;
1334-
if (partitioned_window.partition_dir == e_partition_dir::VERTICAL) {
1335-
// Find the x-coordinate of a cut line directly in the middle of the
1336-
// region. We floor this to prevent fractional cut lines.
1337-
double pivot_x = std::floor((window.region.xmin() + window.region.xmax()) / 2.0);
1334+
// First, try all of the vertical partitions.
1335+
double min_pivot_x = std::floor(window.region.xmin()) + 1.0;
1336+
double max_pivot_x = std::ceil(window.region.xmax()) - 1.0;
1337+
for (double pivot_x = min_pivot_x; pivot_x <= max_pivot_x; pivot_x++) {
1338+
// Cut the region at this cut line.
1339+
auto lower_region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1340+
window.region.ymin()),
1341+
vtr::Point<double>(pivot_x,
1342+
window.region.ymax()));
1343+
1344+
auto upper_region = vtr::Rect<double>(vtr::Point<double>(pivot_x,
1345+
window.region.ymin()),
1346+
vtr::Point<double>(window.region.xmax(),
1347+
window.region.ymax()));
1348+
1349+
// Compute the capacity of each partition for the models that we care
1350+
// about.
1351+
// TODO: This can be made better by looking at the mass of all blocks
1352+
// within the window and scaling the capacity based on that.
1353+
float lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices, lower_region).manhattan_norm();
1354+
lower_window_capacity = std::max(lower_window_capacity, 0.0f);
1355+
float upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices, upper_region).manhattan_norm();
1356+
upper_window_capacity = std::max(upper_window_capacity, 0.0f);
1357+
1358+
// Compute the score of this partition line. The score is simply just
1359+
// the minimum of the two capacities dividided by the maximum of the
1360+
// two capacities.
1361+
float smaller_capacity = std::min(lower_window_capacity, upper_window_capacity);
1362+
float larger_capacity = std::max(lower_window_capacity, upper_window_capacity);
1363+
float cut_score = smaller_capacity / larger_capacity;
1364+
1365+
// If this is the best cut we have ever seen, save it as the result.
1366+
if (cut_score > best_score) {
1367+
best_score = cut_score;
1368+
partitioned_window.partition_dir = e_partition_dir::VERTICAL;
1369+
partitioned_window.pivot_pos = pivot_x;
1370+
partitioned_window.lower_window.region = lower_region;
1371+
partitioned_window.upper_window.region = upper_region;
1372+
}
1373+
}
13381374

1375+
// Next, try all of the horizontal partitions.
1376+
double min_pivot_y = std::floor(window.region.ymin()) + 1.0;
1377+
double max_pivot_y = std::ceil(window.region.ymax()) - 1.0;
1378+
for (double pivot_y = min_pivot_y; pivot_y <= max_pivot_y; pivot_y++) {
13391379
// Cut the region at this cut line.
1340-
lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1341-
window.region.ymin()),
1342-
vtr::Point<double>(pivot_x,
1343-
window.region.ymax()));
1344-
1345-
upper_window.region = vtr::Rect<double>(vtr::Point<double>(pivot_x,
1346-
window.region.ymin()),
1347-
vtr::Point<double>(window.region.xmax(),
1348-
window.region.ymax()));
1349-
partitioned_window.pivot_pos = pivot_x;
1350-
} else {
1351-
VTR_ASSERT(partitioned_window.partition_dir == e_partition_dir::HORIZONTAL);
1352-
// Similarly in the y direction, find the non-fractional y coordinate
1353-
// to make a horizontal cut.
1354-
double pivot_y = std::floor((window.region.ymin() + window.region.ymax()) / 2.0);
1355-
1356-
// Then cut the window.
1357-
lower_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1358-
window.region.ymin()),
1359-
vtr::Point<double>(window.region.xmax(),
1360-
pivot_y));
1361-
1362-
upper_window.region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1363-
pivot_y),
1364-
vtr::Point<double>(window.region.xmax(),
1365-
window.region.ymax()));
1366-
partitioned_window.pivot_pos = pivot_y;
1380+
auto lower_region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1381+
window.region.ymin()),
1382+
vtr::Point<double>(window.region.xmax(),
1383+
pivot_y));
1384+
1385+
auto upper_region = vtr::Rect<double>(vtr::Point<double>(window.region.xmin(),
1386+
pivot_y),
1387+
vtr::Point<double>(window.region.xmax(),
1388+
window.region.ymax()));
1389+
1390+
// Compute the capacity of each partition for the models that we care
1391+
// about.
1392+
// TODO: This can be made better by looking at the mass of all blocks
1393+
// within the window and scaling the capacity based on that.
1394+
float lower_window_capacity = capacity_prefix_sum_.get_sum(model_indices, lower_region).manhattan_norm();
1395+
lower_window_capacity = std::max(lower_window_capacity, 0.0f);
1396+
float upper_window_capacity = capacity_prefix_sum_.get_sum(model_indices, upper_region).manhattan_norm();
1397+
upper_window_capacity = std::max(upper_window_capacity, 0.0f);
1398+
1399+
// Compute the score of this partition line. The score is simply just
1400+
// the minimum of the two capacities dividided by the maximum of the
1401+
// two capacities.
1402+
float smaller_capacity = std::min(lower_window_capacity, upper_window_capacity);
1403+
float larger_capacity = std::max(lower_window_capacity, upper_window_capacity);
1404+
float cut_score = smaller_capacity / larger_capacity;
1405+
1406+
// If this is the best cut we have ever seen, save it as the result.
1407+
if (cut_score > best_score) {
1408+
best_score = cut_score;
1409+
partitioned_window.partition_dir = e_partition_dir::HORIZONTAL;
1410+
partitioned_window.pivot_pos = pivot_y;
1411+
partitioned_window.lower_window.region = lower_region;
1412+
partitioned_window.upper_window.region = upper_region;
1413+
}
13671414
}
13681415

1416+
VTR_ASSERT_MSG(best_score >= 0.0f,
1417+
"Could not find a partition line for given window");
1418+
13691419
return partitioned_window;
13701420
}
13711421

@@ -1475,7 +1525,7 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
14751525
// NOTE: This needs to be an int in case the pivot is 0.
14761526
for (int i = window.contained_blocks.size() - 1; i >= (int)pivot; i--) {
14771527
const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(window.contained_blocks[i]);
1478-
VTR_ASSERT_SAFE(lower_window_underfill.is_non_negative());
1528+
VTR_ASSERT_SAFE(upper_window_underfill.is_non_negative());
14791529
upper_window_underfill -= blk_mass;
14801530
if (upper_window_underfill.is_non_negative())
14811531
upper_window.contained_blocks.push_back(window.contained_blocks[i]);
@@ -1490,8 +1540,6 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
14901540
// windows. To do this we sort the unplaced blocks by largest mass to
14911541
// smallest mass. Then we place each block in the bin with the highest
14921542
// underfill.
1493-
// FIXME: Above was the intuition; however, after experimentation, found that
1494-
// sorting by smallest mass to largest mass worked better...
14951543
// FIXME: I think large blocks (like carry chains) need to be handled special
14961544
// early on. If they are put into a partition too late, they may have
14971545
// to create overfill! Perhaps the partitions can hold two lists.
@@ -1500,20 +1548,20 @@ void BiPartitioningPartialLegalizer::partition_blocks_in_window(
15001548
[&](APBlockId a, APBlockId b) {
15011549
const auto& blk_a_mass = density_manager_->mass_calculator().get_block_mass(a);
15021550
const auto& blk_b_mass = density_manager_->mass_calculator().get_block_mass(b);
1503-
return blk_a_mass.manhattan_norm() < blk_b_mass.manhattan_norm();
1551+
return blk_a_mass.manhattan_norm() > blk_b_mass.manhattan_norm();
15041552
});
15051553
for (APBlockId blk_id : unplaced_blocks) {
15061554
// Project the underfill from each window onto the mass. This gives us
15071555
// the overfill in the dimensions the mass cares about.
15081556
const PrimitiveVector& blk_mass = density_manager_->mass_calculator().get_block_mass(blk_id);
15091557
PrimitiveVector projected_lower_window_underfill = lower_window_underfill;
1510-
lower_window_underfill.project(blk_mass);
1558+
projected_lower_window_underfill.project(blk_mass);
15111559
PrimitiveVector projected_upper_window_underfill = upper_window_underfill;
1512-
upper_window_underfill.project(blk_mass);
1560+
projected_upper_window_underfill.project(blk_mass);
15131561
// Put the block in the window with a higher underfill. This tries to
15141562
// balance the overfill as much as possible. This works even if the
15151563
// overfill becomes negative.
1516-
if (projected_lower_window_underfill.manhattan_norm() >= projected_upper_window_underfill.manhattan_norm()) {
1564+
if (projected_lower_window_underfill.sum() >= projected_upper_window_underfill.sum()) {
15171565
lower_window.contained_blocks.push_back(blk_id);
15181566
lower_window_underfill -= blk_mass;
15191567
} else {

vpr/src/analytical_place/partial_legalizer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ class BiPartitioningPartialLegalizer : public PartialLegalizer {
483483
* the direction of the partition (vertical / horizontal) and the position
484484
* of the cut.
485485
*/
486-
PartitionedWindow partition_window(SpreadingWindow& window);
486+
PartitionedWindow partition_window(SpreadingWindow& window, ModelGroupId group_id);
487487

488488
/**
489489
* @brief Partition the blocks in the given window into the partitioned

vpr/src/analytical_place/primitive_vector.h

+14
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,20 @@ class PrimitiveVector {
266266
return mag;
267267
}
268268

269+
/**
270+
* @brief Computes the sum across all dimensions of the vector.
271+
*
272+
* This is similar to manhattan_norm, however this does not take the
273+
* absolute value of each dimension.
274+
*/
275+
inline float sum() const {
276+
float sum = 0.f;
277+
for (const auto& p : data_) {
278+
sum += p.second;
279+
}
280+
return sum;
281+
}
282+
269283
/**
270284
* @brief Project this vector onto the given vector.
271285
*

vpr/test/test_ap_primitive_vector.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,24 @@ TEST_CASE("test_ap_primitive_vector_verify", "[vpr_ap]") {
241241
vec2 *= -1.f;
242242
REQUIRE(vec2.manhattan_norm() == vec1.manhattan_norm());
243243

244+
// sum:
245+
vec1.clear();
246+
// Sum of the zero vector is zero.
247+
REQUIRE(vec1.sum() == 0.f);
248+
// Sum of a non-negative vector is the sum of its dims.
249+
vec1.set_dim_val(0, 1.f);
250+
REQUIRE(vec1.sum() == 1.f);
251+
vec1.set_dim_val(1, 2.f);
252+
vec1.set_dim_val(2, 3.f);
253+
vec1.set_dim_val(3, 4.f);
254+
vec1.set_dim_val(4, 5.f);
255+
REQUIRE(vec1.sum() == 15.f);
256+
// Sum of a negative vector is the opposite of the sum of the absolute
257+
// value of its dims.
258+
vec2 = vec1;
259+
vec2 *= -1.f;
260+
REQUIRE(vec2.sum() == -1.f * vec1.sum());
261+
244262
// Projection:
245263
// Basic example:
246264
vec1.clear();

0 commit comments

Comments
 (0)