diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir
index 73faf7d9b7b9a3..984b3614eded56 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-gemm-lib.mlir
@@ -62,23 +62,15 @@ module {
                                              tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR>
 
     //
-    // Verify computed result (expected output, with only 20 nonzeros).
+    // Verify computed result.
     //
-    // CHECK:    ( ( 1, 39, 52, 0, 0, 0, 45, 51 ),
-    // CHECK-SAME: ( 0, 0, 0, 0, 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 0, 0, 16, 0, 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 0, 0, 0, 25, 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 0, 0, 0, 0, 36, 0, 0, 0 ),
-    // CHECK-SAME: ( 0, 117, 158, 0, 0, 0, 135, 144 ),
-    // CHECK-SAME: ( 0, 156, 318, 0, 0, 0, 301, 324 ),
-    // CHECK-SAME: ( 0, 208, 430, 0, 0, 0, 405, 436 ) )
-    // CHECK-NEXT: 20
-    %d = sparse_tensor.convert %Ccsr : tensor<8x8xf32, #CSR> to tensor<8x8xf32>
-    %v = vector.transfer_read %d[%c0, %c0], %f0: tensor<8x8xf32>, vector<8x8xf32>
-    vector.print %v : vector<8x8xf32>
-    %nnz = sparse_tensor.number_of_entries %Ccsr : tensor<8x8xf32, #CSR>
-    %x = sparse_tensor.number_of_entries %Ccsr : tensor<8x8xf32, #CSR>
-    vector.print %nnz : index
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 20
+    // CHECK-NEXT: pos[1] : ( 0, 5, 5, 6, 7, 8, 12, 16, 20,
+    // CHECK-NEXT: crd[1] : ( 0, 1, 2, 6, 7, 2, 3, 4, 1, 2, 6, 7, 1, 2, 6, 7, 1, 2, 6, 7,
+    // CHECK-NEXT: values : ( 1, 39, 52, 45, 51, 16, 25, 36, 117, 158, 135, 144, 156, 318, 301, 324, 208, 430, 405, 436,
+    // CHECK-NEXT: ----
+    sparse_tensor.print %Ccsr : tensor<8x8xf32, #CSR>
 
     llvm.call @mgpuDestroySparseEnv(): () -> ()
     return
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matmul-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matmul-lib.mlir
index 67b6e60ab5c869..0e7c75bc41aae2 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matmul-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-matmul-lib.mlir
@@ -66,32 +66,16 @@ module {
     return %D: tensor<8x8xf32>
   }
 
+  // Helper to dump dense tensor as series of vectors.
   func.func @dump(%mat: tensor<8x8xf32>) {
     %f0 = arith.constant 0.0 : f32
     %c0 = arith.constant 0   : index
     %c1 = arith.constant 1   : index
-    %c2 = arith.constant 2   : index
-    %c3 = arith.constant 3   : index
-    %c4 = arith.constant 4   : index
-    %c5 = arith.constant 5   : index
-    %c6 = arith.constant 6   : index
-    %c7 = arith.constant 7   : index
-    %r0 = vector.transfer_read %mat[%c0,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r0 : vector<8xf32>
-    %r1 = vector.transfer_read %mat[%c1,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r1 : vector<8xf32>
-    %r2 = vector.transfer_read %mat[%c2,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r2 : vector<8xf32>
-    %r3 = vector.transfer_read %mat[%c3,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r3 : vector<8xf32>
-    %r4 = vector.transfer_read %mat[%c4,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r4 : vector<8xf32>
-    %r5 = vector.transfer_read %mat[%c5,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r5 : vector<8xf32>
-    %r6 = vector.transfer_read %mat[%c6,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r6 : vector<8xf32>
-    %r7 = vector.transfer_read %mat[%c7,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
-    vector.print %r7 : vector<8xf32>
+    %c8 = arith.constant 8   : index
+    scf.for %i = %c0 to %c8 step %c1 {
+      %v = vector.transfer_read %mat[%i,%c0], %f0 : tensor<8x8xf32>, vector<8xf32>
+      vector.print %v : vector<8xf32>
+    }
     return
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
index 9b33f081512390..aad26556b00e1e 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
@@ -113,11 +113,13 @@ module {
     //
     // Print the result for verification.
     //
-    // CHECK: ( 11, 41.4, 42, 102.5, 93, 44.1, 164, 105.2, 255 )
-    //
-    %vm = sparse_tensor.values %0 : tensor<?x?xf32, #CSR> to memref<?xf32>
-    %vv = vector.transfer_read %vm[%c0], %d0 : memref<?xf32>, vector<9xf32>
-    vector.print %vv : vector<9xf32>
+    // CHECK:   ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 9
+    // CHECK-NEXT: pos[1] : ( 0, 2, 4, 5, 7, 9,
+    // CHECK-NEXT: crd[1] : ( 0, 3, 1, 4, 2, 0, 3, 1, 4,
+    // CHECK-NEXT: values : ( 11, 41.4, 42, 102.5, 93, 44.1, 164, 105.2, 255,
+    // CHECK-NEXT: ----
+    sparse_tensor.print %0 : tensor<?x?xf32, #CSR>
 
     // Create a much sparser sampling matrix.
     %t = arith.constant sparse<[[0,0], [0,1], [1,0], [3,4], [7,7]],
@@ -137,11 +139,14 @@ module {
     //
     // Print the result for verification.
     //
-    // CHECK: ( ( 17, 18, 0, 0, 0, 0, 0, 0 ), ( 19, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 20, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 21 ) )
+    // CHECK:     ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 5
+    // CHECK-NEXT: pos[1] : ( 0, 2, 3, 3, 4, 4, 4, 4, 5,
+    // CHECK-NEXT: crd[1] : ( 0, 1, 0, 4, 7,
+    // CHECK-NEXT: values : ( 17, 18, 19, 20, 21,
+    // CHECK-NEXT: ----
     //
-    %d = sparse_tensor.convert %1 : tensor<?x?xf32, #CSR> to tensor<?x?xf32>
-    %mm = vector.transfer_read %d[%c0, %c0], %d0 : tensor<?x?xf32>, vector<8x8xf32>
-    vector.print %mm : vector<8x8xf32>
+    sparse_tensor.print %1 : tensor<?x?xf32, #CSR>
 
     // Release the resources.
     bufferization.dealloc_tensor %0 : tensor<?x?xf32, #CSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
index db5c154e5e438d..bd2c72271c613a 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
@@ -166,15 +166,22 @@ module {
     // the result (which is why the block sparse version has actual results
     // in the original zero positions).
     //
-    // CHECK:      ( 5, 10, 24, 19, 53, 42, 55, 56 )
-    // CHECK-NEXT: ( 5, 10, 8, 19, 24, 24, 40, 53, 42, 55, 56, 64 )
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 8
+    // CHECK-NEXT: pos[1] : ( 0, 3, 5, 7, 8,
+    // CHECK-NEXT: crd[1] : ( 0, 1, 4, 1, 5, 2, 3, 2,
+    // CHECK-NEXT: values : ( 5, 10, 24, 19, 53, 42, 55, 56,
+    // CHECK-NEXT: ----
     //
-    %v0 = sparse_tensor.values %0 : tensor<?x?xf32, #CSR> to memref<?xf32>
-    %vv0 = vector.transfer_read %v0[%c0], %d0 : memref<?xf32>, vector<8xf32>
-    vector.print %vv0 : vector<8xf32>
-    %v1 = sparse_tensor.values %1 : tensor<?x?xf32, #BSR> to memref<?xf32>
-    %vv1 = vector.transfer_read %v1[%c0], %d0 : memref<?xf32>, vector<12xf32>
-    vector.print %vv1 : vector<12xf32>
+    // CHECK:      ---- Sparse Tensor ----
+    // CHECK-NEXT: nse = 12
+    // CHECK-NEXT: pos[1] : ( 0, 2, 3,
+    // CHECK-NEXT: crd[1] : ( 0, 2, 1,
+    // CHECK-NEXT: values : ( 5, 10, 8, 19, 24, 24, 40, 53, 42, 55, 56, 64,
+    // CHECK-NEXT: ----
+    //
+    sparse_tensor.print %0 : tensor<?x?xf32, #CSR>
+    sparse_tensor.print %1 : tensor<?x?xf32, #BSR>
 
     // Release the resources.
     bufferization.dealloc_tensor %0 : tensor<?x?xf32, #CSR>