docs/02_tests.dox

   1 namespace arm_compute
   2 {
   3 namespace test
   4 {
   5 /**
   6 @page tests Validation and benchmarks tests
   7
   8 @tableofcontents
   9
  10 @section tests_overview Overview
  11
  12 Benchmark and validation tests are based on the same framework to setup and run
  13 the tests. In addition to running simple, self-contained test functions the
  14 framework supports fixtures and data test cases. The former allows to share
  15 common setup routines between various backends thus reducing the amount of
  16 duplicated code. The latter can be used to parameterize tests or fixtures with
  17 different inputs, e.g. different tensor shapes. One limitation is that
  18 tests/fixtures cannot be parameterized based on the data type if static type
  19 information is needed within the test (e.g. to validate the results).
  20
  21 @note By default tests are not built. To enable them you need to add validation_tests=1 and / or benchmark_tests=1 to your SCons line.
  22
  23 @note Tests are not included in the pre-built binary archive, you have to build them from sources.
  24
  25 @subsection tests_overview_structure Directory structure
  26
  27     .
  28     `-- tests <- Top level test directory. All files in here are shared among validation and benchmark.
  29         |-- framework <- Underlying test framework.
  30         |-- CL   \
  31         |-- NEON -> Backend specific files with helper functions etc.
  32         |-- benchmark <- Top level directory for the benchmarking files.
  33         |   |-- fixtures <- Fixtures for benchmark tests.
  34         |   |-- CL <- OpenCL backend test cases on a function level.
  35         |   |   `-- SYSTEM <- OpenCL system tests, e.g. whole networks
  36         |   `-- NEON <- Same for NEON
  37         |       `-- SYSTEM
  38         |-- datasets <- Datasets for benchmark and validation tests.
  39         |-- main.cpp <- Main entry point for the tests. Currently shared between validation and benchmarking.
  40         |-- networks <- Network classes for system level tests.
  41         `-- validation -> Top level directory for validation files.
  42             |-- CPP -> C++ reference code
  43             |-- CL   \
  44             |-- NEON -> Backend specific test cases
  45             `-- fixtures -> Fixtures shared among all backends. Used to setup target function and tensors.
  46
  47 @subsection tests_overview_fixtures Fixtures
  48
  49 Fixtures can be used to share common setup, teardown or even run tasks among
  50 multiple test cases. For that purpose a fixture can define a `setup`,
  51 `teardown` and `run` method. Additionally the constructor and destructor might
  52 also be customized.
  53
  54 An instance of the fixture is created immediately before the actual test is
  55 executed. After construction the @ref framework::Fixture::setup method is called. Then the test
  56 function or the fixtures `run` method is invoked. After test execution the
  57 @ref framework::Fixture::teardown method is called and lastly the fixture is destructed.
  58
  59 @subsubsection tests_overview_fixtures_fixture Fixture
  60
  61 Fixtures for non-parameterized test are straightforward. The custom fixture
  62 class has to inherit from @ref framework::Fixture and choose to implement any of the
  63 `setup`, `teardown` or `run` methods. None of the methods takes any arguments
  64 or returns anything.
  65
  66     class CustomFixture : public framework::Fixture
  67     {
  68         void setup()
  69         {
  70             _ptr = malloc(4000);
  71         }
  72
  73         void run()
  74         {
  75             ARM_COMPUTE_ASSERT(_ptr != nullptr);
  76         }
  77
  78         void teardown()
  79         {
  80             free(_ptr);
  81         }
  82
  83         void *_ptr;
  84     };
  85
  86 @subsubsection tests_overview_fixtures_data_fixture Data fixture
  87
  88 The advantage of a parameterized fixture is that arguments can be passed to the setup method at runtime. To make this possible the setup method has to be a template with a type parameter for every argument (though the template parameter doesn't have to be used). All other methods remain the same.
  89
  90     class CustomFixture : public framework::Fixture
  91     {
  92     #ifdef ALTERNATIVE_DECLARATION
  93         template <typename ...>
  94         void setup(size_t size)
  95         {
  96             _ptr = malloc(size);
  97         }
  98     #else
  99         template <typename T>
 100         void setup(T size)
 101         {
 102             _ptr = malloc(size);
 103         }
 104     #endif
 105
 106         void run()
 107         {
 108             ARM_COMPUTE_ASSERT(_ptr != nullptr);
 109         }
 110
 111         void teardown()
 112         {
 113             free(_ptr);
 114         }
 115
 116         void *_ptr;
 117     };
 118
 119 @subsection tests_overview_test_cases Test cases
 120
 121 All following commands can be optionally prefixed with `EXPECTED_FAILURE_` or
 122 `DISABLED_`.
 123
 124 @subsubsection tests_overview_test_cases_test_case Test case
 125
 126 A simple test case function taking no inputs and having no (shared) state.
 127
 128 - First argument is the name of the test case (has to be unique within the
 129   enclosing test suite).
 130 - Second argument is the dataset mode in which the test will be active.
 131
 132
 133     TEST_CASE(TestCaseName, DatasetMode::PRECOMMIT)
 134     {
 135         ARM_COMPUTE_ASSERT_EQUAL(1 + 1, 2);
 136     }
 137
 138 @subsubsection tests_overview_test_cases_fixture_fixture_test_case Fixture test case
 139
 140 A simple test case function taking no inputs that inherits from a fixture. The
 141 test case will have access to all public and protected members of the fixture.
 142 Only the setup and teardown methods of the fixture will be used. The body of
 143 this function will be used as test function.
 144
 145 - First argument is the name of the test case (has to be unique within the
 146   enclosing test suite).
 147 - Second argument is the class name of the fixture.
 148 - Third argument is the dataset mode in which the test will be active.
 149
 150
 151     class FixtureName : public framework::Fixture
 152     {
 153         public:
 154             void setup() override
 155             {
 156                 _one = 1;
 157             }
 158
 159         protected:
 160             int _one;
 161     };
 162
 163     FIXTURE_TEST_CASE(TestCaseName, FixtureName, DatasetMode::PRECOMMIT)
 164     {
 165         ARM_COMPUTE_ASSERT_EQUAL(_one + 1, 2);
 166     }
 167
 168 @subsubsection tests_overview_test_cases_fixture_register_fixture_test_case Registering a fixture as test case
 169
 170 Allows to use a fixture directly as test case. Instead of defining a new test
 171 function the run method of the fixture will be executed.
 172
 173 - First argument is the name of the test case (has to be unique within the
 174   enclosing test suite).
 175 - Second argument is the class name of the fixture.
 176 - Third argument is the dataset mode in which the test will be active.
 177
 178
 179     class FixtureName : public framework::Fixture
 180     {
 181         public:
 182             void setup() override
 183             {
 184                 _one = 1;
 185             }
 186
 187             void run() override
 188             {
 189                 ARM_COMPUTE_ASSERT_EQUAL(_one + 1, 2);
 190             }
 191
 192         protected:
 193             int _one;
 194     };
 195
 196     REGISTER_FIXTURE_TEST_CASE(TestCaseName, FixtureName, DatasetMode::PRECOMMIT);
 197
 198
 199 @subsubsection tests_overview_test_cases_data_test_case Data test case
 200
 201 A parameterized test case function that has no (shared) state. The dataset will
 202 be used to generate versions of the test case with different inputs.
 203
 204 - First argument is the name of the test case (has to be unique within the
 205   enclosing test suite).
 206 - Second argument is the dataset mode in which the test will be active.
 207 - Third argument is the dataset.
 208 - Further arguments specify names of the arguments to the test function. The
 209   number must match the arity of the dataset.
 210
 211
 212     DATA_TEST_CASE(TestCaseName, DatasetMode::PRECOMMIT, framework::make("Numbers", {1, 2, 3}), num)
 213     {
 214         ARM_COMPUTE_ASSERT(num < 4);
 215     }
 216
 217 @subsubsection tests_overview_test_cases_fixture_data_test_case Fixture data test case
 218
 219 A parameterized test case that inherits from a fixture. The test case will have
 220 access to all public and protected members of the fixture. Only the setup and
 221 teardown methods of the fixture will be used. The setup method of the fixture
 222 needs to be a template and has to accept inputs from the dataset as arguments.
 223 The body of this function will be used as test function. The dataset will be
 224 used to generate versions of the test case with different inputs.
 225
 226 - First argument is the name of the test case (has to be unique within the
 227   enclosing test suite).
 228 - Second argument is the class name of the fixture.
 229 - Third argument is the dataset mode in which the test will be active.
 230 - Fourth argument is the dataset.
 231
 232
 233     class FixtureName : public framework::Fixture
 234     {
 235         public:
 236             template <typename T>
 237             void setup(T num)
 238             {
 239                 _num = num;
 240             }
 241
 242         protected:
 243             int _num;
 244     };
 245
 246     FIXTURE_DATA_TEST_CASE(TestCaseName, FixtureName, DatasetMode::PRECOMMIT, framework::make("Numbers", {1, 2, 3}))
 247     {
 248         ARM_COMPUTE_ASSERT(_num < 4);
 249     }
 250
 251 @subsubsection tests_overview_test_cases_register_fixture_data_test_case Registering a fixture as data test case
 252
 253 Allows to use a fixture directly as parameterized test case. Instead of
 254 defining a new test function the run method of the fixture will be executed.
 255 The setup method of the fixture needs to be a template and has to accept inputs
 256 from the dataset as arguments. The dataset will be used to generate versions of
 257 the test case with different inputs.
 258
 259 - First argument is the name of the test case (has to be unique within the
 260   enclosing test suite).
 261 - Second argument is the class name of the fixture.
 262 - Third argument is the dataset mode in which the test will be active.
 263 - Fourth argument is the dataset.
 264
 265
 266     class FixtureName : public framework::Fixture
 267     {
 268         public:
 269             template <typename T>
 270             void setup(T num)
 271             {
 272                 _num = num;
 273             }
 274
 275             void run() override
 276             {
 277                 ARM_COMPUTE_ASSERT(_num < 4);
 278             }
 279
 280         protected:
 281             int _num;
 282     };
 283
 284     REGISTER_FIXTURE_DATA_TEST_CASE(TestCaseName, FixtureName, DatasetMode::PRECOMMIT, framework::make("Numbers", {1, 2, 3}));
 285
 286 @section writing_tests Writing validation tests
 287
 288 Before starting a new test case have a look at the existing ones. They should
 289 provide a good overview how test cases are structured.
 290
 291 - The C++ reference needs to be added to `tests/validation/CPP/`. The
 292   reference function is typically a template parameterized by the underlying
 293   value type of the `SimpleTensor`. This makes it easy to specialise for
 294   different data types.
 295 - If all backends have a common interface it makes sense to share the setup
 296   code. This can be done by adding a fixture in
 297   `tests/validation/fixtures/`. Inside of the `setup` method of a fixture
 298   the tensors can be created and initialised and the function can be configured
 299   and run. The actual test will only have to validate the results. To be shared
 300   among multiple backends the fixture class is usually a template that accepts
 301   the specific types (data, tensor class, function class etc.) as parameters.
 302 - The actual test cases need to be added for each backend individually.
 303   Typically the will be multiple tests for different data types and for
 304   different execution modes, e.g. precommit and nightly.
 305
 306 @section tests_running_tests Running tests
 307 @subsection tests_running_tests_benchmarking Benchmarking
 308 @subsubsection tests_running_tests_benchmarking_filter Filter tests
 309 All tests can be run by invoking
 310
 311     ./arm_compute_benchmark ./data
 312
 313 where `./data` contains the assets needed by the tests.
 314
 315 If only a subset of the tests has to be executed the `--filter` option takes a
 316 regular expression to select matching tests.
 317
 318     ./arm_compute_benchmark --filter='^NEON/.*AlexNet' ./data
 319
 320 @note Filtering will be much faster if the regular expression starts from the start ("^") or end ("$") of the line.
 321
 322 Additionally each test has a test id which can be used as a filter, too.
 323 However, the test id is not guaranteed to be stable when new tests are added.
 324 Only for a specific build the same the test will keep its id.
 325
 326     ./arm_compute_benchmark --filter-id=10 ./data
 327
 328 All available tests can be displayed with the `--list-tests` switch.
 329
 330     ./arm_compute_benchmark --list-tests
 331
 332 More options can be found in the `--help` message.
 333
 334 @subsubsection tests_running_tests_benchmarking_runtime Runtime
 335 By default every test is run once on a single thread. The number of iterations
 336 can be controlled via the `--iterations` option and the number of threads via
 337 `--threads`.
 338
 339 @subsubsection tests_running_tests_benchmarking_output Output
 340 By default the benchmarking results are printed in a human readable format on
 341 the command line. The colored output can be disabled via `--no-color-output`.
 342 As an alternative output format JSON is supported and can be selected via
 343 `--log-format=json`. To write the output to a file instead of stdout the
 344 `--log-file` option can be used.
 345
 346 @subsubsection tests_running_tests_benchmarking_mode Mode
 347 Tests contain different datasets of different sizes, some of which will take several hours to run.
 348 You can select which datasets to use by using the `--mode` option, we recommed you use `--mode=precommit` to start with.
 349
 350 @subsubsection tests_running_tests_benchmarking_instruments Instruments
 351 You can use the `--instruments` option to select one or more instruments to measure the execution time of the benchmark tests.
 352
 353 `PMU` will try to read the CPU PMU events from the kernel (They need to be enabled on your platform)
 354
 355 `MALI` will try to collect Mali hardware performance counters. (You need to have a recent enough Mali driver)
 356
 357 `WALL_CLOCK_TIMER` will measure time using `gettimeofday`: this should work on all platforms.
 358
 359 You can pass a combinations of these instruments: `--instruments=PMU,MALI,WALL_CLOCK_TIMER`
 360
 361 @note You need to make sure the instruments have been selected at compile time using the `pmu=1` or `mali=1` scons options.
 362
 363 @subsubsection tests_running_examples Examples
 364
 365 To run all the precommit validation tests:
 366
 367         LD_LIBRARY_PATH=. ./arm_compute_validation --mode=precommit
 368
 369 To run the OpenCL precommit validation tests:
 370
 371         LD_LIBRARY_PATH=. ./arm_compute_validation --mode=precommit --filter="^CL.*"
 372
 373 To run the NEON precommit benchmark tests with PMU and Wall Clock timer in miliseconds instruments enabled:
 374
 375         LD_LIBRARY_PATH=. ./arm_compute_benchmark --mode=precommit --filter="^NEON.*" --instruments="pmu,wall_clock_timer_ms" --iterations=10
 376
 377 To run the OpenCL precommit benchmark tests with OpenCL kernel timers in miliseconds enabled:
 378
 379         LD_LIBRARY_PATH=. ./arm_compute_benchmark --mode=precommit --filter="^CL.*" --instruments="opencl_timer_ms" --iterations=10
 380 */
 381 } // namespace test
 382 } // namespace arm_compute