Internal change

PiperOrigin-RevId: 286330042
Change-Id: Iacc6e5caca217d9f3f79108fc0c6affaa7a92911
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index e070d0b..561666c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -364,9 +364,12 @@
     // When possible we squeeze writes for the innermost (only if non-padded)
     // dimension with the first padded dimension. This allows to reduce the
     // number of calls to LinCopy and better utilize vector instructions.
-    const bool squeeze_writes = NumDims > 1 &&
-                                // inner dimension is not padded
-                                input_inner_dim_size == output_inner_dim_size;
+    const bool squeeze_writes =
+        NumDims > 1 &&
+        // inner dimension is not padded
+        (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
+        // and equal to the block inner dimension
+        (input_inner_dim_size == output_inner_dim_size);
 
     const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;