diff --git a/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp b/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp index 434cff6dba..78ef1ff884 100644 --- a/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp +++ b/third_party/intel/lib/TritonIntelGPUTransforms/AccelerateMatmul.cpp @@ -64,7 +64,6 @@ getWarpsPerTile(tt::DotOp dotOp, ceil(dpasCap.repeatCount, dpasCap.executionSize); uint32_t colRowRatio = ceil(dpasCap.executionSize, dpasCap.repeatCount); - llvm::errs() << "rowColRation: " << rowColRatio << ", colRowRatio: " << colRowRatio << ", ret: " << ret[0] << ", " << ret[1] << "\n"; int rowDim = order[rank - 2], colDim = order[rank - 1]; do { @@ -119,20 +118,19 @@ class BlockedToDPAS : public OpRewritePattern { unsigned opsPerChan = ttg::intel::DpasEncodingAttr::getOpsPerChannel(elemType); - SmallVector order = {0, 1}; + SmallVector order = {0, 1}; Operation *aOp = a.getDefiningOp(); if (isa(aOp)) { - auto valueToConvert = aOp->getOperand(0); - aOp = valueToConvert.getDefiningOp(); + auto valueToConvert = aOp->getOperand(0); + aOp = valueToConvert.getDefiningOp(); } if (aOp && isa(aOp)) { Attribute layout; - assert(aOp->getNumResults() == 1); - layout = - cast(aOp->getResult(0).getType()).getEncoding(); + assert(aOp->getNumResults() == 1); + layout = + cast(aOp->getResult(0).getType()).getEncoding(); order = triton::gpu::getOrder(layout); } - llvm::errs() << "order: " << order[0] << ", " << order[1] << "\n"; SmallVector warpsPerTile = getWarpsPerTile(dotOp, dpasCap, retShape, numWarps, order);