From 57d4c6cf424892888866ed98551f769cb5656623 Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Wed, 25 Aug 2021 16:42:14 -0700
Subject: [PATCH] =?utf8?q?replace=20`self.assertTrue(torch.allclose(..))`?=
 =?utf8?q?=20with=20`self.assertEqual(=E2=80=A6)`=20(#63637)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Summary:
Fixes https://github.com/pytorch/pytorch/issues/63565

Pull Request resolved: https://github.com/pytorch/pytorch/pull/63637

Reviewed By: malfet

Differential Revision: D30541266

Pulled By: mruberry

fbshipit-source-id: ab461949782c6908a589ea098fcfcf5c3e081ee6
---
 test/custom_operator/test_custom_ops.py        | 10 +++---
 test/jit/test_freezing.py                      | 12 +++----
 test/jit/test_tracer.py                        |  4 +--
 test/package/test_directory_reader.py          |  2 +-
 test/package/test_model.py                     |  8 ++---
 test/package/test_package_fx.py                |  8 ++---
 test/package/test_package_script.py            | 16 ++++-----
 test/quantization/core/test_workflow_module.py |  6 ++--
 test/quantization/core/test_workflow_ops.py    | 22 ++++++------
 test/quantization/fx/test_equalize_fx.py       | 10 +++---
 test/quantization/fx/test_numeric_suite_fx.py  |  6 ++--
 test/quantization/fx/test_quantize_fx.py       |  2 +-
 test/test_autograd.py                          |  6 ++--
 test/test_bundled_images.py                    |  4 +--
 test/test_cuda.py                              | 14 ++++----
 test/test_jit.py                               |  6 ++--
 test/test_nn.py                                | 46 +++++++++++++-------------
 test/test_overrides.py                         | 11 +++---
 test/test_spectral_ops.py                      |  2 +-
 torch/testing/_internal/common_jit.py          |  2 +-
 torch/testing/_internal/jit_utils.py           |  2 +-
 21 files changed, 101 insertions(+), 98 deletions(-)

diff --git a/test/custom_operator/test_custom_ops.py b/test/custom_operator/test_custom_ops.py
index 3937abd..356b493 100644
--- a/test/custom_operator/test_custom_ops.py
+++ b/test/custom_operator/test_custom_ops.py
@@ -44,8 +44,8 @@ class TestCustomOperators(TestCase):
         output.sum().backward(go, False, True)
         grad = torch.ones(5, 5)
 
-        self.assertTrue(torch.allclose(x.grad, y + grad))
-        self.assertTrue(torch.allclose(y.grad, x + grad * 2))
+        self.assertEqual(x.grad, y + grad)
+        self.assertEqual(y.grad, x + grad * 2)
 
         # Test with optional arg.
         x.grad.zero_()
@@ -56,9 +56,9 @@ class TestCustomOperators(TestCase):
 
         go = torch.ones((), requires_grad=True)
         output.sum().backward(go, False, True)
-        self.assertTrue(torch.allclose(x.grad, y + grad))
-        self.assertTrue(torch.allclose(y.grad, x + grad * 2))
-        self.assertTrue(torch.allclose(z.grad, grad))
+        self.assertEqual(x.grad, y + grad)
+        self.assertEqual(y.grad, x + grad * 2)
+        self.assertEqual(z.grad, grad)
 
     def test_calling_custom_op_with_autograd_in_nograd_mode(self):
         with torch.no_grad():
diff --git a/test/jit/test_freezing.py b/test/jit/test_freezing.py
index 8e07af0..e9317b1 100644
--- a/test/jit/test_freezing.py
+++ b/test/jit/test_freezing.py
@@ -1877,7 +1877,7 @@ class TestFrozenOptimizations(JitTestCase):
             N, C, H, W, = 10, 3, 224, 224
             inp = torch.randn(N, C, H, W)
             self.run_pass("convert_frozen_ops_to_mkldnn", mod.graph)
-            self.assertTrue(torch.allclose(model(inp), mod(inp)))
+            self.assertEqual(model(inp), mod(inp))
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     def test_pool2d_batchnorm(self):
@@ -1901,7 +1901,7 @@ class TestFrozenOptimizations(JitTestCase):
                 self.run_pass('dce', mod.graph)
                 self.run_pass("convert_frozen_ops_to_mkldnn", mod.graph)
                 FileCheck().check("aten::to_dense").check_next("return").run(mod.graph)
-                self.assertTrue(torch.allclose(sub_model(inp), mod(inp)))
+                self.assertEqual(sub_model(inp), mod(inp))
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     def test_pool3d_batchnorm(self):
@@ -1925,7 +1925,7 @@ class TestFrozenOptimizations(JitTestCase):
                 self.run_pass('dce', mod.graph)
                 self.run_pass("convert_frozen_ops_to_mkldnn", mod.graph)
                 FileCheck().check("aten::to_dense").check_next("return").run(mod.graph)
-                self.assertTrue(torch.allclose(sub_model(inp), mod(inp)))
+                self.assertEqual(sub_model(inp), mod(inp))
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     @skipIfNoTorchVision
@@ -1964,7 +1964,7 @@ class TestFrozenOptimizations(JitTestCase):
                                 check_count("aten::to_dense", 1, exactly=True).run(mod.graph))
                         else:
                             FileCheck().check_count("aten::to_dense", 1, exactly=True).check("aten::layer_norm").run(mod.graph)
-                        self.assertTrue(torch.allclose(sub_model(param[2]), mod(param[2]), 1e-04, 1e-04))
+                        self.assertEqual(sub_model(param[2]), mod(param[2]), rtol=1e-04, atol=1e-04)
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     @skipIfNoTorchVision
@@ -2003,7 +2003,7 @@ class TestFrozenOptimizations(JitTestCase):
                 inp = torch.randn(N, C, H, W)
                 self.run_pass("convert_frozen_ops_to_mkldnn", mod.graph)
                 FileCheck().check_count("aten::to_dense", 1, exactly=True).run(mod.graph)
-                self.assertTrue(torch.allclose(sub_model(inp), mod(inp)))
+                self.assertEqual(sub_model(inp), mod(inp))
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     def test_hardswish_hardsigmoid(self):
@@ -2030,7 +2030,7 @@ class TestFrozenOptimizations(JitTestCase):
                         x = torch.rand(size)
                         # `inplace=False` is intentional, otherwise we modify the input
                         # and we aren't testing aten impls anyways
-                        self.assertTrue(torch.allclose(aten_op(x, inplace=False), m(x).to_dense()))
+                        self.assertEqual(aten_op(x, inplace=False), m(x).to_dense())
 
     @unittest.skipIf(not torch._C.has_mkldnn, "MKL-DNN build is disabled")
     def test_scalar_mul(self):
diff --git a/test/jit/test_tracer.py b/test/jit/test_tracer.py
index 247072f..1d95dc8 100644
--- a/test/jit/test_tracer.py
+++ b/test/jit/test_tracer.py
@@ -163,13 +163,13 @@ class TestTracer(JitTestCase):
         eager_out = mod(*test_inputs)
         traced_out = traced_func(*test_inputs)
         self.assertNotWarn(lambda: traced_func(*test_inputs), "Shouldn't throw slicing related warn here")
-        self.assertTrue(torch.allclose(eager_out, traced_out))
+        self.assertEqual(eager_out, traced_out)
 
         test_inputs = (torch.randint(0, 50, (50, 50)), torch.tensor(12))
         eager_out = mod(*test_inputs)
         traced_out = traced_func(*test_inputs)
         self.assertNotWarn(lambda: traced_func(*test_inputs), "Shouldn't throw slicing related warn here")
-        self.assertTrue(torch.allclose(eager_out, traced_out))
+        self.assertEqual(eager_out, traced_out)
 
 
     def test_typeas_trace_check(self):
diff --git a/test/package/test_directory_reader.py b/test/package/test_directory_reader.py
index 93968d6..576a7f0 100644
--- a/test/package/test_directory_reader.py
+++ b/test/package/test_directory_reader.py
@@ -61,7 +61,7 @@ class DirectoryReaderTest(PackageTestCase):
             importer = PackageImporter(Path(temp_dir) / Path(filename).name)
             dir_mod = importer.load_pickle("model", "model.pkl")
             input = torch.rand(1, 3, 224, 224)
-            self.assertTrue(torch.allclose(dir_mod(input), resnet(input)))
+            self.assertEqual(dir_mod(input), resnet(input))
 
     def test_loading_module(self):
         """
diff --git a/test/package/test_model.py b/test/package/test_model.py
index f5e08b6..dc67ff5 100644
--- a/test/package/test_model.py
+++ b/test/package/test_model.py
@@ -49,7 +49,7 @@ class ModelTest(PackageTestCase):
         # test that it works
         input = torch.rand(1, 3, 224, 224)
         ref = resnet(input)
-        self.assertTrue(torch.allclose(r2(input), ref))
+        self.assertEqual(r2(input), ref)
 
         # functions exist also to get at the private modules in each package
         torchvision = i.import_module("torchvision")
@@ -81,7 +81,7 @@ class ModelTest(PackageTestCase):
 
         i2 = PackageImporter(f2)
         r3 = i2.load_pickle("model", "model.pkl")
-        self.assertTrue(torch.allclose(r3(input), ref))
+        self.assertEqual(r3(input), ref)
 
     @skipIfNoTorchVision
     def test_model_save(self):
@@ -159,7 +159,7 @@ class ModelTest(PackageTestCase):
             r = the_model(input)
             results.append(r)
 
-        self.assertTrue(torch.allclose(*results))
+        self.assertEqual(*results)
 
     @skipIfNoTorchVision
     def test_script_resnet(self):
@@ -188,7 +188,7 @@ class ModelTest(PackageTestCase):
         loaded = torch.jit.load(f2)
 
         input = torch.rand(1, 3, 224, 224)
-        self.assertTrue(torch.allclose((loaded(input)), resnet(input)))
+        self.assertEqual(loaded(input), resnet(input))
 
 
 if __name__ == "__main__":
diff --git a/test/package/test_package_fx.py b/test/package/test_package_fx.py
index 7f31014..64d431c 100644
--- a/test/package/test_package_fx.py
+++ b/test/package/test_package_fx.py
@@ -36,7 +36,7 @@ class TestPackageFX(PackageTestCase):
         pi = PackageImporter(f)
         loaded_traced = pi.load_pickle("model", "model.pkl")
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded_traced(input), traced(input)))
+        self.assertEqual(loaded_traced(input), traced(input))
 
     def test_package_then_fx(self):
         from package_a.test_module import SimpleTest
@@ -52,7 +52,7 @@ class TestPackageFX(PackageTestCase):
         loaded = pi.load_pickle("model", "model.pkl")
         traced = symbolic_trace(loaded)
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded(input), traced(input)))
+        self.assertEqual(loaded(input), traced(input))
 
     def test_package_fx_package(self):
         from package_a.test_module import SimpleTest
@@ -87,7 +87,7 @@ class TestPackageFX(PackageTestCase):
         loaded2 = pi2.load_pickle("model", "model.pkl")
 
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded(input), loaded2(input)))
+        self.assertEqual(loaded(input), loaded2(input))
 
     def test_package_fx_with_imports(self):
         import package_a.subpackage
@@ -158,7 +158,7 @@ class TestPackageFX(PackageTestCase):
         self.assertEqual(loaded_gm.info, "secret")
 
         input_x = torch.randn(3)
-        self.assertTrue(torch.allclose(loaded_gm(input_x), gm(input_x)))
+        self.assertEqual(loaded_gm(input_x), gm(input_x))
 
 
 if __name__ == "__main__":
diff --git a/test/package/test_package_script.py b/test/package/test_package_script.py
index 3bbaed0..ecacd79 100644
--- a/test/package/test_package_script.py
+++ b/test/package/test_package_script.py
@@ -51,7 +51,7 @@ class TestPackageScript(PackageTestCase):
 
         input = torch.tensor(1)
 
-        self.assertTrue(torch.allclose(scripted(input), scripted_loaded(input)))
+        self.assertEqual(scripted(input), scripted_loaded(input))
 
     def test_different_package_interface(self):
         """Test a case where the interface defined in the package is
@@ -149,7 +149,7 @@ class TestPackageScript(PackageTestCase):
         input = torch.rand(2, 3)
         loaded_script_class = diff_fake.MyScriptClass(input)
         orig_script_class = fake.MyScriptClass(input)
-        self.assertTrue(torch.allclose(loaded_script_class.bar, orig_script_class.foo))
+        self.assertEqual(loaded_script_class.bar, orig_script_class.foo)
 
     def test_save_scriptmodule(self):
         """
@@ -506,7 +506,7 @@ class TestPackageScript(PackageTestCase):
         self.assertTrue(len(file_structure.children[".data"].children) == 1)
 
         input = torch.rand(2, 3, 4)
-        self.assertTrue(torch.allclose(loaded_mod_1(input), mod1(input)))
+        self.assertEqual(loaded_mod_1(input), mod1(input))
 
     def test_load_shared_tensors(self):
         """
@@ -630,7 +630,7 @@ class TestPackageScript(PackageTestCase):
         loaded_mod = importer_0.load_pickle("model", "model.pkl")
 
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded_mod(input), orig_mod(input)))
+        self.assertEqual(loaded_mod(input), orig_mod(input))
 
         scripted_mod = torch.jit.script(loaded_mod)
 
@@ -643,7 +643,7 @@ class TestPackageScript(PackageTestCase):
         importer_1 = PackageImporter(buffer_1)
         loaded_mod_scripted = importer_1.load_pickle("res", "scripted_mod.pkl")
 
-        self.assertTrue(torch.allclose(loaded_mod_scripted(input), orig_mod(input)))
+        self.assertEqual(loaded_mod_scripted(input), orig_mod(input))
 
     def test_mixing_packaged_and_inline_modules(self):
         """
@@ -680,7 +680,7 @@ class TestPackageScript(PackageTestCase):
         loaded_imported = importer.load_pickle("model", "imported.pkl")
 
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded_imported(input), imported_mod(input)))
+        self.assertEqual(loaded_imported(input), imported_mod(input))
         self.assertEqual(loaded_inline("input"), inline_mod("input"))
 
     @skipIfNoTorchVision
@@ -721,8 +721,8 @@ class TestPackageScript(PackageTestCase):
         loaded_imported = importer.load_pickle("model", "imported.pkl")
 
         input = torch.rand(2, 3)
-        self.assertTrue(torch.allclose(loaded_imported(input), imported_mod(input)))
-        self.assertTrue(torch.allclose(loaded_inline(input), inline_mod(input)))
+        self.assertEqual(loaded_imported(input), imported_mod(input))
+        self.assertEqual(loaded_inline(input), inline_mod(input))
 
     def test_tensor_sharing_pickle(self):
         """Test that saving a ScriptModule and a separately saving a tensor
diff --git a/test/quantization/core/test_workflow_module.py b/test/quantization/core/test_workflow_module.py
index 2298653..b7782ec 100644
--- a/test/quantization/core/test_workflow_module.py
+++ b/test/quantization/core/test_workflow_module.py
@@ -205,11 +205,11 @@ class TestObserver(QuantizationTestCase):
             if reduce_range:
                 ref_scales = [s * 255 / 127 for s in ref_scales]
                 ref_zero_points = [math.floor(z / 2) for z in ref_zero_points]
-            self.assertTrue(torch.allclose(qparams[0], torch.tensor(ref_scales, dtype=qparams[0].dtype), atol=0.0001))
+            self.assertEqual(qparams[0], torch.tensor(ref_scales, dtype=qparams[0].dtype), rtol=1e-5, atol=0.0001)
             if qscheme == torch.per_channel_affine_float_qparams:
-                self.assertTrue(torch.allclose(qparams[1], torch.tensor(ref_zero_points, dtype=qparams[1].dtype), atol=1))
+                self.assertEqual(qparams[1], torch.tensor(ref_zero_points, dtype=qparams[1].dtype), rtol=1e-5, atol=1)
             else:
-                self.assertTrue(torch.allclose(qparams[1], torch.tensor(ref_zero_points, dtype=qparams[1].dtype)))
+                self.assertEqual(qparams[1], torch.tensor(ref_zero_points, dtype=qparams[1].dtype))
 
 
             # Test for serializability
diff --git a/test/quantization/core/test_workflow_ops.py b/test/quantization/core/test_workflow_ops.py
index 9fcf5ac..60cd043 100644
--- a/test/quantization/core/test_workflow_ops.py
+++ b/test/quantization/core/test_workflow_ops.py
@@ -312,13 +312,13 @@ class TestFakeQuantizeOps(TestCase):
             X1 = torch.randn(5, 5).to(torch.float16)
             Y1 = torch.fake_quantize_per_tensor_affine(X1, scale, zero, mini, maxi)
             Y1r = _fake_quantize_per_tensor_affine_reference(X1, scale, zero, mini, maxi)
-            self.assertTrue(torch.allclose(Y1, Y1r, rtol=tolerance, atol=tolerance))
+            self.assertEqual(Y1, Y1r, rtol=tolerance, atol=tolerance)
 
         # to force overflow
         X2 = torch.tensor(2**15 + .01).to(torch.float16)
         Y2 = torch.fake_quantize_per_tensor_affine(X2, scale, zero, mini, maxi)
         Y2r = _fake_quantize_per_tensor_affine_reference(X2, scale, zero, mini, maxi)
-        self.assertTrue(torch.allclose(Y2, Y2r, rtol=tolerance, atol=tolerance))
+        self.assertEqual(Y2, Y2r, rtol=tolerance, atol=tolerance)
 
         scale = 10
 
@@ -326,7 +326,7 @@ class TestFakeQuantizeOps(TestCase):
         X3 = torch.tensor(2**-24).to(torch.float16)
         Y3 = torch.fake_quantize_per_tensor_affine(X3, scale, zero, mini, maxi)
         Y3r = _fake_quantize_per_tensor_affine_reference(X3, scale, zero, mini, maxi)
-        self.assertTrue(torch.allclose(Y3, Y3r, rtol=tolerance, atol=tolerance))
+        self.assertEqual(Y3, Y3r, rtol=tolerance, atol=tolerance)
 
     def _test_forward_per_tensor_cachemask_impl(self, device):
         float_types = (torch.float32, torch.float16, torch.float64)
@@ -347,7 +347,7 @@ class TestFakeQuantizeOps(TestCase):
                 X, scale, zero_point, quant_min, quant_max)
             Y_ref = _fake_quantize_per_tensor_affine_reference(
                 X, scale, zero_point, quant_min, quant_max).to(device)
-            self.assertTrue(torch.allclose(Y_test, Y_ref, rtol=tolerance, atol=tolerance))
+            self.assertEqual(Y_test, Y_ref, rtol=tolerance, atol=tolerance)
             self.assertTrue(Y_test.dtype == float_type)
 
     def test_forward_per_tensor_cachemask_cpu(self):
@@ -380,14 +380,14 @@ class TestFakeQuantizeOps(TestCase):
                 X, scale, zero_point, quant_min, quant_max)
             Y_ref = _fake_quantize_per_tensor_affine_reference(
                 X, scale, zero_point, quant_min, quant_max).to(device)
-            self.assertTrue(torch.allclose(Y_test, Y_ref, rtol=tolerance, atol=tolerance))
+            self.assertEqual(Y_test, Y_ref, rtol=tolerance, atol=tolerance)
 
             # backward pass
             dout = torch.rand_like(X, dtype=torch.float).to(device)
             dX = _fake_quantize_per_tensor_affine_grad_reference(
                 dout, X, scale, zero_point, quant_min, quant_max)
             Y_test.backward(dout)
-            self.assertTrue(torch.allclose(dX, X.grad))
+            self.assertEqual(dX, X.grad)
             self.assertTrue(X.grad.dtype == float_type)
 
     def test_backward_per_tensor_cachemask_cpu(self):
@@ -729,14 +729,14 @@ class TestFakeQuantizeOps(TestCase):
             X1 = torch.randn(4, 5).to(torch.float16)
             Y1 = torch.fake_quantize_per_channel_affine(X1, scale, zero, axis, mini, maxi)
             Y1r = _fake_quantize_per_channel_affine_reference(X1, scale, zero, axis, mini, maxi)
-            self.assertTrue(torch.allclose(Y1, Y1r, rtol=tolerance, atol=tolerance))
+            self.assertEqual(Y1, Y1r, rtol=tolerance, atol=tolerance)
 
         # to force overflow
         X2 = torch.randn(4, 5).to(torch.float16)
         X2[0, 0] = 2**15 + .01
         Y2 = torch.fake_quantize_per_channel_affine(X2, scale, zero, axis, mini, maxi)
         Y2r = _fake_quantize_per_channel_affine_reference(X2, scale, zero, axis, mini, maxi)
-        self.assertTrue(torch.allclose(Y2, Y2r, rtol=tolerance, atol=tolerance))
+        self.assertEqual(Y2, Y2r, rtol=tolerance, atol=tolerance)
 
         scale = torch.zeros(5) + 10
 
@@ -745,7 +745,7 @@ class TestFakeQuantizeOps(TestCase):
         X3[0, 0] = 2**-24
         Y3 = torch.fake_quantize_per_channel_affine(X3, scale, zero, axis, mini, maxi)
         Y3r = _fake_quantize_per_channel_affine_reference(X3, scale, zero, axis, mini, maxi)
-        self.assertTrue(torch.allclose(Y3, Y3r, rtol=tolerance, atol=tolerance))
+        self.assertEqual(Y3, Y3r, rtol=tolerance, atol=tolerance)
 
     def _test_learnable_forward_per_channel(self, X_base, device, scale_base, zero_point_base, axis):
         r"""Tests the forward path of the learnable FakeQuantizePerTensorAffine op.
@@ -1160,7 +1160,7 @@ class TestFusedObsFakeQuant(TestCase):
 
         dX = _fake_quantize_per_tensor_affine_grad_reference(
             dout, x, x_scale, x_zero_point, 0, 255)
-        self.assertTrue(torch.allclose(dX, x.grad))
+        self.assertEqual(dX, x.grad)
         self.assertTrue(x.grad.dtype == torch.float32)
 
     @given(device=st.sampled_from(['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']),)
@@ -1206,7 +1206,7 @@ class TestFusedObsFakeQuant(TestCase):
 
         dX = _fake_quantize_per_tensor_affine_grad_reference(
             dout, x, x_scale, x_zero_point, 0, 255)
-        self.assertTrue(torch.allclose(dX, x.grad))
+        self.assertEqual(dX, x.grad)
         self.assertTrue(x.grad.dtype == torch.float32)
 
 if __name__ == '__main__':
diff --git a/test/quantization/fx/test_equalize_fx.py b/test/quantization/fx/test_equalize_fx.py
index 7c17d12..a74b174 100644
--- a/test/quantization/fx/test_equalize_fx.py
+++ b/test/quantization/fx/test_equalize_fx.py
@@ -217,10 +217,10 @@ class TestEqualizeFx(QuantizationTestCase):
             ref_zero_points = -128 if weight_qdtype is torch.qint8 else 0
             ref_zero_points = ref_zero_points - np.round(ref_min_weights_scaled / ref_scales)
 
-        self.assertTrue(torch.allclose(weight_qparams[0], torch.tensor(
-            ref_scales, dtype=weight_qparams[0].dtype), atol=0.0001))
-        self.assertTrue(torch.allclose(weight_qparams[1], torch.tensor(
-            ref_zero_points, dtype=weight_qparams[1].dtype), atol=1))
+        self.assertEqual(weight_qparams[0], torch.tensor(
+            ref_scales, dtype=weight_qparams[0].dtype), rtol=1e-5, atol=0.0001)
+        self.assertEqual(weight_qparams[1], torch.tensor(
+            ref_zero_points, dtype=weight_qparams[1].dtype), rtol=1e-5, atol=1)
 
     def test_input_weight_equalization_prepare(self):
         """ Tests that graphs created after prepare_fx is as expected
@@ -783,7 +783,7 @@ class TestEqualizeFx(QuantizationTestCase):
             prepared(x)
             equalized_and_quantized = convert_fx(prepared)  # Check if compile
             equalized_and_quantized_output = equalized_and_quantized(x)
-            self.assertTrue(torch.allclose(quantized_output, equalized_and_quantized_output, atol=0.1))
+            self.assertEqual(quantized_output, equalized_and_quantized_output, rtol=1e-5, atol=0.1)
 
     @skipIfNoFBGEMM
     def test_selective_equalization(self):
diff --git a/test/quantization/fx/test_numeric_suite_fx.py b/test/quantization/fx/test_numeric_suite_fx.py
index 61062fb..3e627f5 100644
--- a/test/quantization/fx/test_numeric_suite_fx.py
+++ b/test/quantization/fx/test_numeric_suite_fx.py
@@ -1834,8 +1834,8 @@ class TestFXNumericSuiteCoreAPIs(FXNumericSuiteQuantizationTestCase):
         mp_ns, mc_ns = add_loggers('fp32', mp, 'int8', mc, OutputLogger)
         ref_fp32_ns = mp_ns(datum)
         ref_int8_ns = mc_ns(datum)
-        self.assertTrue(torch.allclose(ref_fp32, ref_fp32_ns))
-        self.assertTrue(torch.allclose(ref_int8, ref_int8_ns))
+        self.assertEqual(ref_fp32, ref_fp32_ns)
+        self.assertEqual(ref_int8, ref_int8_ns)
 
     @skipIfNoFBGEMM
     def test_shadow_loggers_preserve_qat_numerics(self):
@@ -1852,7 +1852,7 @@ class TestFXNumericSuiteCoreAPIs(FXNumericSuiteQuantizationTestCase):
 
         mc_shadows_mp = add_shadow_loggers('int8', mc, 'fp32', mp, OutputLogger)
         ref_shadow = mc_shadows_mp(datum)
-        self.assertTrue(torch.allclose(ref_fp32, ref_shadow))
+        self.assertEqual(ref_fp32, ref_shadow)
 
 class TestFXNumericSuiteCoreAPIsModels(FXNumericSuiteQuantizationTestCase):
     """
diff --git a/test/quantization/fx/test_quantize_fx.py b/test/quantization/fx/test_quantize_fx.py
index 1bc6b61..08474d2 100644
--- a/test/quantization/fx/test_quantize_fx.py
+++ b/test/quantization/fx/test_quantize_fx.py
@@ -4668,7 +4668,7 @@ class TestQuantizeFxOps(QuantizationTestCase):
             m2q = torch.quantization.convert(m2p)
             q_result2 = m2q(data)
             # verify results match
-            self.assertTrue(torch.allclose(q_result1, q_result2))
+            self.assertEqual(q_result1, q_result2)
 
     @unittest.skipUnless('qnnpack' in supported_qengines,
                          "This Pytorch Build has not been built with or does not support QNNPACK")
diff --git a/test/test_autograd.py b/test/test_autograd.py
index 126d923..4d41645 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -2801,11 +2801,11 @@ class TestAutograd(TestCase):
 
         r1 = var1 * var1 * mean1 * mean1
         r2 = var2 * var2 * mean2 * mean2
-        self.assertTrue(torch.allclose(r1, r2, rtol=0.01, atol=0.0))
+        self.assertEqual(r1, r2, rtol=0.01, atol=0.0)
 
         torch.autograd.backward(r1, grad)
         torch.autograd.backward(r2, grad)
-        self.assertTrue(torch.allclose(input1.grad, input2.grad, rtol=0.01, atol=0.0))
+        self.assertEqual(input1.grad, input2.grad, rtol=0.01, atol=0.0)
 
     @slowTest
     @skipIfNoLapack
@@ -5159,7 +5159,7 @@ for shape in [(1,), ()]:
 
         # TODO: this is a bug!
         # once this is fixed, it should have the transpose removed:
-        # self.assertTrue(torch.allclose(non_inplace_grad, inplace_grad))
+        # self.assertEqual(non_inplace_grad, inplace_grad)
         self.assertEqual(non_inplace_grad.T, inplace_grad)
 
     def test_autograd_multiple_views_python(self):
diff --git a/test/test_bundled_images.py b/test/test_bundled_images.py
index 0c95ae3..7efd401 100644
--- a/test/test_bundled_images.py
+++ b/test/test_bundled_images.py
@@ -67,7 +67,7 @@ class TestBundledImages(TestCase):
         self.assertEqual(len(inflated), 1)
         self.assertEqual(len(inflated[0]), 1)
         self.assertEqual(raw_data.shape, decoded_data.shape)
-        self.assertTrue(torch.allclose(raw_data, decoded_data, atol=0.1, rtol=1e-01))
+        self.assertEqual(raw_data, decoded_data, atol=0.1, rtol=1e-01)
 
         # Check if fb::image_decode_to_NCHW works as expected
         with open("caffe2/test/test_img/p1.jpg", "rb") as fp:
@@ -76,4 +76,4 @@ class TestBundledImages(TestCase):
             byte_tensor = torch.tensor(list(fp.read())).byte()
             im2_tensor = torch.ops.fb.image_decode_to_NCHW(byte_tensor, weight, bias)
             self.assertEqual(raw_data.shape, im2_tensor.shape)
-            self.assertTrue(torch.allclose(raw_data, im2_tensor, atol=0.1, rtol=1e-01))
+            self.assertEqual(raw_data, im2_tensor, atol=0.1, rtol=1e-01)
diff --git a/test/test_cuda.py b/test/test_cuda.py
index 55bab2e..e90cb17 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -2036,7 +2036,7 @@ torch.cuda.synchronize()
             else:
                 self.assertEqual(found_inf, 0.0)
                 for grad in grads:
-                    self.assertTrue(torch.allclose(grad, torch.ones_like(grad), atol=1e-7))
+                    self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
 
         # When passing lists with mismatched dtypes to a raw
         # _amp_foreach_non_finite_check_and_unscale_ call,
@@ -2044,7 +2044,7 @@ torch.cuda.synchronize()
         grads = [g.clone(), g.to(dtype=torch.float16)]
         torch._amp_foreach_non_finite_check_and_unscale_(grads, found_inf, inv_scale)
         for grad in grads:
-            self.assertTrue(torch.allclose(grad, torch.ones_like(grad), atol=1e-7))
+            self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
 
         # Passing lists with mismatched devices to a raw
         # _amp_foreach_non_finite_check_and_unscale_ call should raise errors.
@@ -2084,7 +2084,7 @@ torch.cuda.synchronize()
                 # No inf was injected, ensures unscaling worked normally.
                 self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 0)
                 for grad in grads:
-                    self.assertTrue(torch.allclose(grad, torch.ones_like(grad), atol=1e-7))
+                    self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7)
             else:
                 # inf was injected, ensures inf was found.
                 self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 1)
@@ -2136,7 +2136,7 @@ torch.cuda.synchronize()
         found_inf.zero_()
         found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur]
         self.assertEqual(found_inf, 0.0)
-        self.assertTrue(torch.allclose(p.grad.to_dense(), (s / 4).to_dense()))
+        self.assertEqual(p.grad.to_dense(), (s / 4).to_dense())
 
         v = torch.FloatTensor([16., 32., float('inf')])
         p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device="cuda", dtype=dtype)
@@ -2158,7 +2158,7 @@ torch.cuda.synchronize()
         found_inf.zero_()
         found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, True)[cur]
         self.assertEqual(found_inf, 0.0)
-        self.assertTrue(torch.allclose(p.grad.to_dense(), (s.half() / 4).to_dense()))
+        self.assertEqual(p.grad.to_dense(), (s.half() / 4).to_dense())
 
         # Creates fp16 sparse tensor with duplicated indices (uncoalesced).  The uncoalesced representation
         # does not overflow in fp16, but the coalesced representation would, because 64000 + 64000 > fp16 max.
@@ -2465,7 +2465,7 @@ torch.cuda.synchronize()
 
             for c, s in zip(chain(mod_control0.parameters(), mod_control1.parameters()),
                             chain(mod_scaling0.parameters(), mod_scaling1.parameters())):
-                self.assertTrue(torch.allclose(c, s, atol=1e-7))
+                self.assertEqual(c, s, rtol=1e-5, atol=1e-7)
 
     @unittest.skipIf(not TEST_MULTIGPU, "only one GPU detected")
     def test_grad_scaling_multigpu(self):
@@ -2534,7 +2534,7 @@ torch.cuda.synchronize()
 
             for c, s in zip(chain(mod_control0.parameters(), mod_control1.parameters()),
                             chain(mod_scaling0.parameters(), mod_scaling1.parameters())):
-                self.assertTrue(torch.allclose(c, s, atol=1e-7))
+                self.assertEqual(c, s, rtol=1e-5, atol=1e-7)
 
     def test_cublas_multiple_threads_same_device(self):
         # Note, these parameters should be very carefully tuned
diff --git a/test/test_jit.py b/test/test_jit.py
index 28de172..2595411 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -14954,7 +14954,7 @@ dedent """
                                                                   attn_mask=mask)[0]
         # print("rel. error: ")
         # print(jit_out / py_out - 1)
-        self.assertTrue(torch.allclose(jit_out, py_out, atol=5e-4, rtol=1e-4))
+        self.assertEqual(jit_out, py_out, atol=5e-4, rtol=1e-4)
 
     @unittest.skipIf(not RUN_CUDA, "no CUDA")
     def test_scriptmodule_multi_head_attn_cuda(self):
@@ -14990,7 +14990,7 @@ dedent """
                                                                   None, None, None, 0.0,
                                                                   model.mod.out_proj.weight,
                                                                   model.mod.out_proj.bias)[0]
-        self.assertTrue(torch.allclose(jit_out, py_out, atol=5e-4, rtol=1e-4))
+        self.assertEqual(jit_out, py_out, atol=5e-4, rtol=1e-4)
 
     @unittest.skipIf(not RUN_CUDA, "no CUDA")
     def test_scriptmodule_transformer_cuda(self):
@@ -15029,7 +15029,7 @@ dedent """
 
         # print(jit_out/py_out-1)
         # print(torch.allclose(jit_out, py_out, atol=5e-4, rtol=1e-4))
-        self.assertTrue(torch.allclose(jit_out, py_out, atol=5e-4, rtol=1e-4))
+        self.assertEqual(jit_out, py_out, atol=5e-4, rtol=1e-4)
 
     def test_list_python_op(self):
         def python_list_op(lst):
diff --git a/test/test_nn.py b/test/test_nn.py
index 8c3541a..c6fe0b2 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -229,7 +229,7 @@ class TestAvgPool(TestCase):
                 actual = torch.nn.functional.avg_pool2d(input[0], (i, j))
                 actual = actual.view(1, actual.numel())
                 expected = self._avg_pool2d(input, (i, j))
-                self.assertTrue(torch.allclose(actual, expected, rtol=0, atol=1e-5))
+                self.assertEqual(actual, expected, rtol=0, atol=1e-5)
 
     def test_avg_pool2d_with_zero_divisor(self):
         self.assertRaisesRegex(RuntimeError, "divisor must be not zero",
@@ -244,7 +244,7 @@ class TestAvgPool(TestCase):
                     actual = F.avg_pool2d(input[0], (i, j), divisor_override=divisor)
                     actual = actual.view(1, actual.numel())
                     expected = self._sum_pool2d(input, (i, j)) / divisor
-                    self.assertTrue(torch.allclose(actual, expected, rtol=0, atol=1e-5))
+                    self.assertEqual(actual, expected, rtol=0, atol=1e-5)
 
     def test_doubletensor_avg_pool3d(self):
         h, w, d = 5, 6, 7
@@ -255,7 +255,7 @@ class TestAvgPool(TestCase):
                     actual = torch.nn.functional.avg_pool3d(input.unsqueeze(0), (i, j, k))
                     actual = actual.view(1, actual.numel())
                     expected = self._avg_pool3d(input, (i, j, k))
-                    self.assertTrue(torch.allclose(actual, expected, rtol=0, atol=1e-5))
+                    self.assertEqual(actual, expected, rtol=0, atol=1e-5)
 
     def test_doubletensor_avg_pool3d_with_divisor(self):
         h, w, d = 6, 5, 7
@@ -267,7 +267,7 @@ class TestAvgPool(TestCase):
                         actual = torch.nn.functional.avg_pool3d(input.unsqueeze(0), (i, j, k), divisor_override=divisor)
                         actual = actual.view(1, actual.numel())
                         expected = self._sum_pool3d(input, (i, j, k)) / divisor
-                        self.assertTrue(torch.allclose(actual, expected, rtol=0, atol=1e-5))
+                        self.assertEqual(actual, expected, rtol=0, atol=1e-5)
 
     def test_avg_pool3d_with_zero_divisor(self):
         self.assertRaisesRegex(RuntimeError, "divisor must be not zero",
@@ -2260,7 +2260,7 @@ class TestNN(NNTestCase):
         self.assertNotIn("weight", model._parameters)
         # Result should be skew-symmetric
         A = model.weight
-        self.assertTrue(torch.allclose(A, -A.T))
+        self.assertEqual(A, -A.T)
         # Remove and check consistency
         parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
         self.assertFalse(hasattr(model, "parametrizations"))
@@ -2277,7 +2277,7 @@ class TestNN(NNTestCase):
         self.assertNotIn("weight", model._parameters)
         # Result should be skew-symmetric
         A = model.weight
-        self.assertTrue(torch.allclose(A, -A.T))
+        self.assertEqual(A, -A.T)
         # Remove and check consistency
         parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
         self.assertFalse(hasattr(model, "parametrizations"))
@@ -2291,7 +2291,7 @@ class TestNN(NNTestCase):
         # Result should be orthogonal
         X = model.weight
         Id = torch.eye(X.size(0), device=X.device)
-        self.assertTrue(torch.allclose(X.T @ X, Id))
+        self.assertEqual(X.T @ X, Id)
         # Structure tests
         self.assertTrue(hasattr(model, "parametrizations"))
         self.assertTrue(parametrize.is_parametrized(model))
@@ -2810,10 +2810,10 @@ class TestNN(NNTestCase):
         init_weight = model.weight.clone()
         parametrize.register_parametrization(model, "weight", RankOne())
         # Projecting a rank 1 matrix onto the matrices of rank one does not change the matrix
-        self.assertTrue(torch.allclose(init_weight, model.weight))
+        self.assertEqual(init_weight, model.weight)
         parametrize.register_parametrization(model, "weight", Double())
         # The matrix now is twice the initial matrix
-        self.assertTrue(torch.allclose(2.0 * init_weight, model.weight))
+        self.assertEqual(2.0 * init_weight, model.weight)
         # Multiplying by a scalar does not change the rank
         self.assertEqual(torch.linalg.matrix_rank(model.weight).item(), 1)
 
@@ -11276,7 +11276,7 @@ class TestNN(NNTestCase):
         grads1 = torch.autograd.grad(layer_norm(x).sum(), x, create_graph=False)[0]
         grads2 = torch.autograd.grad(layer_norm(x).sum(), x, create_graph=True)[0]
 
-        self.assertTrue(torch.allclose(grads1, grads2, rtol, atol))
+        self.assertEqual(grads1, grads2, rtol=rtol, atol=atol)
 
         if TEST_CUDA:
             x = x.to('cuda')
@@ -11285,7 +11285,7 @@ class TestNN(NNTestCase):
             grads1 = torch.autograd.grad(layer_norm(x).sum(), x, create_graph=False)[0]
             grads2 = torch.autograd.grad(layer_norm(x).sum(), x, create_graph=True)[0]
 
-            self.assertTrue(torch.allclose(grads1, grads2, rtol, atol))
+            self.assertEqual(grads1, grads2, rtol=rtol, atol=atol)
 
     def test_padding_list(self):
         # Padding can be a list, or tuple (regression test for gh-54452)
@@ -11793,7 +11793,7 @@ class TestAddRelu(TestCase):
         relu_res = torch.relu(add_res)
         add_relu_res = torch._VF._add_relu(a, b)
 
-        self.assertTrue(torch.allclose(add_relu_res, relu_res))
+        self.assertEqual(add_relu_res, relu_res)
 
     def test_add_relu_broadcasting(self):
         a = torch.rand((1, 32))
@@ -11802,7 +11802,7 @@ class TestAddRelu(TestCase):
         res = torch._VF._add_relu(a, b)
         broadcasted_res = torch._VF._add_relu(a, b_scalar)
 
-        self.assertTrue(torch.allclose(broadcasted_res, res))
+        self.assertEqual(broadcasted_res, res)
 
 
 def add_test(test, decorator=None):
@@ -14070,8 +14070,8 @@ class TestNNDeviceType(NNTestCase):
 
             self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
             self.assertTrue(ref_out.is_contiguous())
-            self.assertTrue(torch.allclose(out, ref_out))
-            self.assertTrue(torch.allclose(input.grad, ref_input.grad))
+            self.assertEqual(out, ref_out)
+            self.assertEqual(input.grad, ref_input.grad)
 
         helper(4, 8, 8, 8, 3)
         helper(4, 8, 8, 8, 3, count_include_pad=False, padding=1)
@@ -14200,9 +14200,9 @@ class TestNNDeviceType(NNTestCase):
             self.assertTrue(ref_out.is_contiguous())
             self.assertTrue(ind.is_contiguous(memory_format=torch.channels_last))
             self.assertTrue(ref_ind.is_contiguous())
-            self.assertTrue(torch.allclose(out, ref_out))
-            self.assertTrue(torch.allclose(ind, ref_ind))
-            self.assertTrue(torch.allclose(input.grad, ref_input.grad))
+            self.assertEqual(out, ref_out)
+            self.assertEqual(ind, ref_ind)
+            self.assertEqual(input.grad, ref_input.grad)
 
         helper(4, 8, 8, 8, 7)
         helper(200, 512, 28, 28, 2)
@@ -17180,7 +17180,7 @@ class TestNNDeviceType(NNTestCase):
             shape = tuple(32 if i != dim else 256 for i in range(4))
             x = torch.randn(shape, device=device, requires_grad=True)
             F.max_pool3d(x, kernel_size=(1, 1, 1)).sum().backward()
-            self.assertTrue(torch.allclose(x.grad, torch.ones_like(x.grad)))
+            self.assertEqual(x.grad, torch.ones_like(x.grad))
 
     # Check that clip_grad_norm_ raises an error if the total norm of the
     # parameters' gradients is non-finite
@@ -17672,7 +17672,7 @@ class TestModuleGlobalHooks(TestCase):
 
         input = torch.randn(2, 2)
         output = module(input)
-        self.assertTrue(torch.allclose(torch.sigmoid(input), output))
+        self.assertEqual(torch.sigmoid(input), output)
 
         # make sure hook removal is successful
         self.assertFalse(handle.id in handle.hooks_dict_ref())
@@ -17707,7 +17707,7 @@ class TestModuleGlobalHooks(TestCase):
 
         input = torch.randn(2, 2)
         output = module(input)
-        self.assertTrue(torch.allclose(torch.sigmoid(input), output))
+        self.assertEqual(torch.sigmoid(input), output)
 
         # make sure hook removal is successful
         self.assertFalse(handle.id in handle.hooks_dict_ref())
@@ -18001,7 +18001,7 @@ class TestLazyModules(TestCase):
         module = TestModule()
         module.register_forward_pre_hook(hook_function)
         output = module(torch.zeros(2, 2))
-        self.assertTrue(torch.allclose(output, torch.ones(2, 2)))
+        self.assertEqual(output, torch.ones(2, 2))
 
     def test_lazy_forward_hook(self):
         """
@@ -18024,7 +18024,7 @@ class TestLazyModules(TestCase):
         module = TestModule()
         module.register_forward_hook(hook_function)
         output = module(torch.zeros(2, 2))
-        self.assertTrue(torch.allclose(output, torch.ones(2, 2)))
+        self.assertEqual(output, torch.ones(2, 2))
 
     @suppress_warnings
     def test_lazy_conv1d(self):
diff --git a/test/test_overrides.py b/test/test_overrides.py
index 4104437..a625237 100644
--- a/test/test_overrides.py
+++ b/test/test_overrides.py
@@ -762,6 +762,9 @@ class Wrapper:
     def __int__(self):
         return self.__torch_function__(torch.Tensor.__int__, (Wrapper,), (self,))
 
+    def __len__(self):
+        return len(self._data)
+
 
 # unwrap inputs if necessary
 def unwrap(v):
@@ -782,15 +785,15 @@ class TestEinsumOverride(TestCase):
     def test_wrapper(self):
         x = Wrapper(torch.randn(5))
         y = Wrapper(torch.randn(4))
-        self.assertTrue(torch.allclose(torch.einsum('i,j->ij', x, y),
-                                       torch.ger(x, y)))
+        self.assertEqual(torch.einsum('i,j->ij', x, y)._data,
+                         torch.ger(x, y)._data)
 
         # in the old einsum interface, `operands` is a list
         a = Wrapper(torch.randn(2, 3))
         b = Wrapper(torch.randn(5, 3, 7))
         c = Wrapper(torch.randn(2, 7))
-        self.assertTrue(torch.allclose(torch.einsum('ik,jkl,il->ij', [a, b, c]),
-                                       torch.nn.functional.bilinear(a, c, b)))
+        self.assertEqual(torch.einsum('ik,jkl,il->ij', [a, b, c])._data,
+                         torch.nn.functional.bilinear(a, c, b)._data)
 
 class TestGradCheckOverride(TestCase):
     "Test that wrappers work with gradcheck."
diff --git a/test/test_spectral_ops.py b/test/test_spectral_ops.py
index e7e4832..fdc8c01 100644
--- a/test/test_spectral_ops.py
+++ b/test/test_spectral_ops.py
@@ -658,7 +658,7 @@ class TestFFT(TestCase):
 
             # Test fftshift sorts the fftfreq output
             shifted = torch.fft.fftshift(x)
-            self.assertTrue(torch.allclose(shifted, shifted.sort().values))
+            self.assertEqual(shifted, shifted.sort().values)
             self.assertEqual(sorted_fft_freqs, shifted)
 
             # And ifftshift is the inverse
diff --git a/torch/testing/_internal/common_jit.py b/torch/testing/_internal/common_jit.py
index 80cb4d0..3b62ced 100644
--- a/torch/testing/_internal/common_jit.py
+++ b/torch/testing/_internal/common_jit.py
@@ -136,7 +136,7 @@ def check_against_reference(self, func, reference_func, output_func, args, kwarg
         for g2, g2_test in zip(grads2, grads2_test):
             if g2 is None and g2_test is None:
                 continue
-            self.assertTrue(torch.allclose(g2, g2_test, atol=5e-4, rtol=1e-4))
+            self.assertEqual(g2, g2_test, atol=5e-4, rtol=1e-4)
 
 class JitCommonTestCase(TestCase):
     def createFunctionFromGraph(self, trace):
diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py
index 7f9fb97..50d8dac 100644
--- a/torch/testing/_internal/jit_utils.py
+++ b/torch/testing/_internal/jit_utils.py
@@ -594,7 +594,7 @@ class JitTestCase(JitCommonTestCase):
             for g2, g2_ge in zip(grads2, grads2_ge):
                 if g2 is None and g2_ge is None:
                     continue
-                self.assertTrue(torch.allclose(g2, g2_ge, atol=8e-4, rtol=8e-4))
+                self.assertEqual(g2, g2_ge, atol=8e-4, rtol=8e-4)
 
         return ge
 
-- 
2.7.4